public virtual void TestEnforceDeletions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10))); // asserts below requires no unexpected merges: // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: DirectoryReader reader = DirectoryReader.Open(writer.IndexWriter, true); // same reason we don't wrap? IndexSearcher searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.IsTrue(filter.GetSizeInBytes() > 0); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); Query constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen reader // that had no change to deletions // fake delete (deletes nothing): writer.DeleteDocuments(new Term("foo", "bar")); IndexReader oldReader = reader; reader = RefreshReader(reader); Assert.IsTrue(reader == oldReader); int missCount = filter.missCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // cache hit: Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); missCount = filter.missCount; docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // cache hit Assert.AreEqual(missCount, filter.missCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // apply deletes dynamically: filter = new CachingWrapperFilter(startFilter); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); missCount = filter.missCount; Assert.IsTrue(missCount > 0); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); writer.AddDocument(doc); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits..."); Assert.IsTrue(filter.missCount > missCount); missCount = filter.missCount; constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.missCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.missCount); // NOTE: silliness to make sure JRE does not eliminate // our holding onto oldReader to prevent // CachingWrapperFilter's WeakHashMap from dropping the // entry: Assert.IsTrue(oldReader != null); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestInfiniteValues() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO)); doc.Add(new Int64Field("long", long.MinValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO)); doc.Add(new Int64Field("long", long.MaxValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", 0.0, Field.Store.NO)); doc.Add(new Int64Field("long", 0L, Field.Store.NO)); writer.AddDocument(doc); foreach (double d in TestNumericUtils.DOUBLE_NANs) { doc = new Document(); doc.Add(new DoubleField("double", d, Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); IndexReader r = DirectoryReader.Open(dir); IndexSearcher s = NewSearcher(r); Query q = NumericRangeQuery.NewInt64Range("long", null, null, true, true); TopDocs topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count"); r.Dispose(); dir.Dispose(); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random.nextInt(5) == 0 && i != numDocs - 1) { string term = RandomPicks.RandomFrom(Random, terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random.nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.IndexWriter.AddDocument(doc2); iw2.IndexWriter.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates) { // update NDV of docs belonging to one term (covers many documents) long value = Random.NextInt64(); string term = RandomPicks.RandomFrom(Random, terms); iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
public virtual void TestParsingAndSearching() { string field = "content"; string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) }; // queries that should find no docs Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) }; PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } }; WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } }; // prepare the index Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, docs[i], Field.Store.NO)); iw.AddDocument(doc); } iw.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); // test queries that must find all foreach (Query q in matchAll) { if (VERBOSE) { Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none foreach (Query q in matchNone) { if (VERBOSE) { Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // thest the prefi queries find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { Query q = matchOneDocPrefix[i][j]; if (VERBOSE) { Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test the wildcard queries find only one doc for (int i = 0; i < matchOneDocWild.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { Query q = matchOneDocWild[i][j]; if (VERBOSE) { Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } reader.Dispose(); dir.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); LittleReader = DirectoryReader.Open(Directory); Searcher = NewSearcher(LittleReader); // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) Searcher.Similarity = new DefaultSimilarity(); // Make big index Dir2 = new MockDirectoryWrapper(Random, new RAMDirectory(Directory, IOContext.DEFAULT)); // First multiply small test index: MulFactor = 1; int docCount = 0; if (VERBOSE) { Console.WriteLine("\nTEST: now copy index..."); } do { if (VERBOSE) { Console.WriteLine("\nTEST: cycle..."); } Directory copy = new MockDirectoryWrapper(Random, new RAMDirectory(Dir2, IOContext.DEFAULT)); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Dir2); w.AddIndexes(copy); docCount = w.MaxDoc; w.Dispose(); MulFactor *= 2; } while (docCount < 3000); RandomIndexWriter riw = new RandomIndexWriter(Random, Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))); Document doc_ = new Document(); doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } doc_ = new Document(); doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } Reader = riw.GetReader(); BigSearcher = NewSearcher(Reader); riw.Dispose(); }
/// <summary> /// Build the suggest index, using up to the specified /// amount of temporary RAM while building. Note that /// the weights for the suggestions are ignored. /// </summary> public virtual void Build(IInputIterator iterator, double ramBufferSizeMB) { if (iterator.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); // TODO: messy ... java7 has Files.createTempDirectory // ... but 4.x is java6: DirectoryInfo tempIndexPath = null; Random random = new Random(); while (true) { tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + random.Next(int.MaxValue))); tempIndexPath.Create(); if (System.IO.Directory.Exists(tempIndexPath.FullName)) { break; } } using (Directory dir = FSDirectory.Open(tempIndexPath)) { #pragma warning disable 612, 618 IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer); #pragma warning restore 612, 618 iwc.SetOpenMode(OpenMode.CREATE); iwc.SetRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; ft.Freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.Add(field); totTokens = 0; IndexReader reader = null; bool success = false; count = 0; try { while (true) { BytesRef surfaceForm = iterator.Next(); if (surfaceForm == null) { break; } field.SetStringValue(surfaceForm.Utf8ToString()); writer.AddDocument(doc); count++; } reader = DirectoryReader.Open(writer, false); Terms terms = MultiFields.GetTerms(reader, "body"); if (terms == null) { throw new System.ArgumentException("need at least one suggestion"); } // Move all ngrams into an FST: TermsEnum termsEnum = terms.GetIterator(null); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); Int32sRef scratchInts = new Int32sRef(); while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } int ngramCount = CountGrams(term); if (ngramCount > grams) { throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams); } if (ngramCount == 1) { totTokens += termsEnum.TotalTermFreq; } builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq)); } fst = builder.Finish(); if (fst == null) { throw new System.ArgumentException("need at least one suggestion"); } //System.out.println("FST: " + fst.getNodeCount() + " nodes"); /* * PrintWriter pw = new PrintWriter("/x/tmp/out.dot"); * Util.toDot(fst, pw, true, true); * pw.close(); */ success = true; } finally { try { if (success) { IOUtils.Close(writer, reader); } else { IOUtils.CloseWhileHandlingException(writer, reader); } } finally { foreach (string file in dir.ListAll()) { FileInfo path = new FileInfo(Path.Combine(tempIndexPath.FullName, file)); try { path.Delete(); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + path, e); } } try { tempIndexPath.Delete(); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + tempIndexPath, e); } } } } }
/// <summary> /// Test using various international locales with accented characters (which /// sort differently depending on locale). /// </summary> // Copied (and slightly modified) from // Lucene.Net.Search.TestSort.TestInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult) { using (Directory indexStore = NewDirectory()) { using (IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)))) { // document data: // the tracer field is used to determine which document was hit string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } }; FieldType customType = new FieldType(); customType.IsStored = true; for (int i = 0; i < sortData.Length; ++i) { Document doc = new Document(); doc.Add(new Field("tracer", sortData[i][0], customType)); doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO)); if (sortData[i][2] != null) { doc.Add(new TextField("US", usAnalyzer.GetTokenStream("US", new StringReader(sortData[i][2])))); } if (sortData[i][3] != null) { doc.Add(new TextField("France", franceAnalyzer.GetTokenStream("France", new StringReader(sortData[i][3])))); } if (sortData[i][4] != null) { doc.Add(new TextField("Sweden", swedenAnalyzer.GetTokenStream("Sweden", new StringReader(sortData[i][4])))); } if (sortData[i][5] != null) { doc.Add(new TextField("Denmark", denmarkAnalyzer.GetTokenStream("Denmark", new StringReader(sortData[i][5])))); } writer.AddDocument(doc); } writer.ForceMerge(1); } // writer.Dispose(); using (IndexReader reader = DirectoryReader.Open(indexStore)) { IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Search.Query queryX = new TermQuery(new Term("contents", "x")); Search.Query queryY = new TermQuery(new Term("contents", "y")); sort.SetSort(new SortField("US", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, usResult); sort.SetSort(new SortField("France", SortFieldType.STRING)); this.AssertMatches(searcher, queryX, sort, frResult); sort.SetSort(new SortField("Sweden", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, svResult); sort.SetSort(new SortField("Denmark", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, dkResult); } // reader.Dispose(); } // indexStore.Dispose(); }
private void matchedFieldsTestCase(bool useMatchedFields, bool fieldMatch, String fieldValue, String expected, params Query[] queryClauses) { Document doc = new Document(); FieldType stored = new FieldType(TextField.TYPE_STORED); stored.StoreTermVectorOffsets = (true); stored.StoreTermVectorPositions = (true); stored.StoreTermVectors = (true); stored.Freeze(); FieldType matched = new FieldType(TextField.TYPE_NOT_STORED); matched.StoreTermVectorOffsets = (true); matched.StoreTermVectorPositions = (true); matched.StoreTermVectors = (true); matched.Freeze(); doc.Add(new Field("field", fieldValue, stored)); // Whitespace tokenized with English stop words doc.Add(new Field("field_exact", fieldValue, matched)); // Whitespace tokenized without stop words doc.Add(new Field("field_super_exact", fieldValue, matched)); // Whitespace tokenized without toLower doc.Add(new Field("field_characters", fieldValue, matched)); // Each letter is a token doc.Add(new Field("field_tripples", fieldValue, matched)); // Every three letters is a token doc.Add(new Field("field_sliced", fieldValue.Substring(0, // Sliced at 10 chars then analyzed just like field Math.Min(fieldValue.Length - 1, 10) - 0), matched)); doc.Add(new Field("field_der_red", new CannedTokenStream( // Hacky field containing "der" and "red" at pos = 0 token("der", 1, 0, 3), token("red", 0, 0, 3) ), matched)); Analyzer analyzer = new AnalyzerWrapperAnonymousHelper(); Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IFragListBuilder fragListBuilder = new SimpleFragListBuilder(); IFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); IndexReader reader = DirectoryReader.Open(writer, true); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; IEncoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery query = new BooleanQuery(); foreach (Query clause in queryClauses) { query.Add(clause, Occur.MUST); } FieldQuery fieldQuery = new FieldQuery(query, reader, true, fieldMatch); String[] bestFragments; if (useMatchedFields) { ISet <String> matchedFields = new JCG.HashSet <String>(); matchedFields.Add("field"); matchedFields.Add("field_exact"); matchedFields.Add("field_super_exact"); matchedFields.Add("field_characters"); matchedFields.Add("field_tripples"); matchedFields.Add("field_sliced"); matchedFields.Add("field_der_red"); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", matchedFields, 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder); } else { bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder); } assertEquals(expected, bestFragments[0]); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
private void DoTest(FieldInfo.DocValuesType_e type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case FieldInfo.DocValuesType_e.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.LongValue = i; switch (type) { case FieldInfo.DocValuesType_e.SORTED: case FieldInfo.DocValuesType_e.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random(), 20); } while (((string)vals[i]).Length == 0); f.BytesValue = new BytesRef((string)vals[i]); break; case FieldInfo.DocValuesType_e.NUMERIC: int bitsPerValue = Random().NextIntBetween(1, 31); // keep it an int vals[i] = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue)); f.LongValue = (long)vals[i]; break; } iw.AddDocument(document); if (Random().NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case FieldInfo.DocValuesType_e.BINARY: case FieldInfo.DocValuesType_e.SORTED: vs = new BytesRefFieldSource("dv"); break; case FieldInfo.DocValuesType_e.NUMERIC: vs = new LongFieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is LongFieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.IntVal(i)]; switch (type) { case FieldInfo.DocValuesType_e.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd() >= 1); goto case FieldInfo.DocValuesType_e.BINARY; case FieldInfo.DocValuesType_e.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case FieldInfo.DocValuesType_e.NUMERIC: assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i)); break; } } } rd.Dispose(); d.Dispose(); }
public void TestPhraseHighlightTest() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type); Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type); doc.Add(longTermField); doc.Add(noLongTermField); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); int docId = 0; String field = "no_long_term"; { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(field, "test")), Occur.MUST); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { BooleanQuery query = new BooleanQuery(); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term(field, "test")); pq.Add(new Term(field, "foo")); pq.Add(new Term(field, "highlighed")); pq.Slop = (5); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(pq, Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { PhraseQuery query = new PhraseQuery(); query.Add(new Term(field, "test")); query.Add(new Term(field, "foo")); query.Add(new Term(field, "highlighed")); query.Slop = (3); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { PhraseQuery query = new PhraseQuery(); query.Add(new Term(field, "test")); query.Add(new Term(field, "foo")); query.Add(new Term(field, "highlighted")); query.Slop = (30); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); assertEquals(0, bestFragments.Length); } { BooleanQuery query = new BooleanQuery(); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term(field, "test")); pq.Add(new Term(field, "foo")); pq.Add(new Term(field, "highlighed")); pq.Slop = (5); BooleanQuery inner = new BooleanQuery(); inner.Add(pq, Occur.MUST); inner.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(inner, Occur.MUST); query.Add(pq, Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } field = "long_term"; { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(field, "thisisaverylongwordandmakessurethisfails")), Occur.MUST); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>", bestFragments[0]); } reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestBooleanPhraseWithSynonym() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_NOT_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Token syn = new Token("httpwwwfacebookcom", 6, 29); syn.PositionIncrement = (0); CannedTokenStream ts = new CannedTokenStream( new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29) ); Field field = new Field("field", ts, type); doc.Add(field); doc.Add(new StoredField("field", "Test: http://www.facebook.com")); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); int docId = 0; // query1: match PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "test")); pq.Add(new Term("field", "http")); pq.Add(new Term("field", "www")); pq.Add(new Term("field", "facebook")); pq.Add(new Term("field", "com")); FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query2: match PhraseQuery pq2 = new PhraseQuery(); pq2.Add(new Term("field", "test")); pq2.Add(new Term("field", "httpwwwfacebookcom")); pq2.Add(new Term("field", "www")); pq2.Add(new Term("field", "facebook")); pq2.Add(new Term("field", "com")); fieldQuery = highlighter.GetFieldQuery(pq2, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query3: OR query1 and query2 together BooleanQuery bq = new BooleanQuery(); bq.Add(pq, Occur.SHOULD); bq.Add(pq2, Occur.SHOULD); fieldQuery = highlighter.GetFieldQuery(bq, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
private void BuildDictionaries() { var propertyIdDomainsDictList = new Dictionary <int, HashSet <int> >(); var propertyIdRangesDictList = new Dictionary <int, HashSet <int> >(); var logger = Logger.Logger.Init(); logger.Info("Building Inverted Properties Domain and Range Dictionary"); using (var luceneDirectory = FSDirectory.Open(_propertiesIndexPath)) { using var luceneDirectoryReader = DirectoryReader.Open(luceneDirectory); var docCount = luceneDirectoryReader.MaxDoc; for (var i = 0; i < docCount; i++) { var doc = luceneDirectoryReader.Document(i); var property = doc.MapProperty(); propertyIdDomainsDictList.AddSafe(property.Id.ToInt(), property.Domain); propertyIdRangesDictList.AddSafe(property.Id.ToInt(), property.Range); } } _propertyIdDomainTypesDictionary = propertyIdDomainsDictList.ToArrayDictionary(); _propertyIdRangeTypesDictionary = propertyIdRangesDictList.ToArrayDictionary(); _typeIdDomainPropertiesDictionary = _propertyIdDomainTypesDictionary.InvertDictionary(); _typeIdRangePropertiesDictionary = _propertyIdRangeTypesDictionary.InvertDictionary(); var propertyDomainOutgoingPropertiesIds = new Dictionary <int, HashSet <int> >(); var propertyDomainIncomingPropertiesIds = new Dictionary <int, HashSet <int> >(); var propertyRangeOutgoingPropertiesIds = new Dictionary <int, HashSet <int> >(); var propertyRangeIncomingPropertiesIds = new Dictionary <int, HashSet <int> >(); foreach (var propertyId in _propertyIdDomainTypesDictionary.Select(x => x.Key)) { propertyDomainOutgoingPropertiesIds[propertyId] = new HashSet <int>(); propertyDomainIncomingPropertiesIds[propertyId] = new HashSet <int>(); if (!_propertyIdDomainTypesDictionary.ContainsKey(propertyId)) { continue; } var domainIds = _propertyIdDomainTypesDictionary[propertyId]; foreach (var domainId in domainIds) { if (_typeIdDomainPropertiesDictionary.ContainsKey(domainId)) { var domainProperties = _typeIdDomainPropertiesDictionary[domainId]; propertyDomainOutgoingPropertiesIds[propertyId].AddAll(domainProperties); } if (_typeIdRangePropertiesDictionary.ContainsKey(domainId)) { var rangeProperties = _typeIdRangePropertiesDictionary[domainId]; propertyDomainIncomingPropertiesIds[propertyId].AddAll(rangeProperties); } } } foreach (var propertyId in _propertyIdRangeTypesDictionary.Select(x => x.Key)) { propertyRangeOutgoingPropertiesIds[propertyId] = new HashSet <int>(); propertyRangeIncomingPropertiesIds[propertyId] = new HashSet <int>(); if (!_propertyIdRangeTypesDictionary.ContainsKey(propertyId)) { continue; } var rangeIds = _propertyIdRangeTypesDictionary[propertyId]; foreach (var rangeId in rangeIds) { if (_typeIdDomainPropertiesDictionary.ContainsKey(rangeId)) { var domainProperties = _typeIdDomainPropertiesDictionary[rangeId]; propertyRangeOutgoingPropertiesIds[propertyId].AddAll(domainProperties); } if (_typeIdRangePropertiesDictionary.ContainsKey(rangeId)) { var rangeProperties = _typeIdRangePropertiesDictionary[rangeId]; propertyRangeIncomingPropertiesIds[propertyId].AddAll(rangeProperties); } } } _propertyDomainOutgoingPropertiesIds = propertyDomainOutgoingPropertiesIds.ToArrayDictionary(); _propertyDomainIncomingPropertiesIds = propertyDomainIncomingPropertiesIds.ToArrayDictionary(); _propertyRangeOutgoingPropertiesIds = propertyRangeOutgoingPropertiesIds.ToArrayDictionary(); _propertyRangeIncomingPropertiesIds = propertyRangeIncomingPropertiesIds.ToArrayDictionary(); logger.Info("InMemory Domain Range Query Engine Complete"); }
public void TestClose() { using (IndexReader r = DirectoryReader.Open(userindex)) { spellChecker.ClearIndex(); string field = "field1"; Addwords(r, spellChecker, "field1"); int num_field1 = this.NumDoc(); Addwords(r, spellChecker, "field2"); int num_field2 = this.NumDoc(); assertEquals(num_field2, num_field1 + 1); CheckCommonSuggestions(r); AssertLastSearcherOpen(4); spellChecker.Dispose(); AssertSearchersClosed(); // LUCENENET NOTE: Per MSDN, calling Dispose() multiple times // should be a safe operation. http://stackoverflow.com/a/5306896/181087 // Certainly, there shouldn't be a problem with calling Dispose() within // a using block if you decide to free up resources early. //try //{ // spellChecker.Dispose(); // fail("spellchecker was already closed"); //} //catch (ObjectDisposedException e) //{ // // expected //} try { CheckCommonSuggestions(r); fail("spellchecker was already closed"); } catch (ObjectDisposedException /*e*/) { // expected } try { spellChecker.ClearIndex(); fail("spellchecker was already closed"); } catch (ObjectDisposedException /*e*/) { // expected } try { spellChecker.IndexDictionary(new LuceneDictionary(r, field), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false); fail("spellchecker was already closed"); } catch (ObjectDisposedException /*e*/) { // expected } try { spellChecker.SetSpellIndex(spellindex); fail("spellchecker was already closed"); } catch (ObjectDisposedException /*e*/) { // expected } assertEquals(4, searchers.Count); AssertSearchersClosed(); } }
public void TestSuggestModes() { using (IndexReader r = DirectoryReader.Open(userindex)) { spellChecker.ClearIndex(); Addwords(r, spellChecker, "field1"); { string[] similar = spellChecker.SuggestSimilar("eighty", 2, r, "field1", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.Length); assertEquals("eighty", similar[0]); } { string[] similar = spellChecker.SuggestSimilar("eight", 2, r, "field1", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.Length); assertEquals("eight", similar[0]); } { string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1", SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(5, similar.Length); assertEquals("eight", similar[0]); } { string[] similar = spellChecker.SuggestSimilar("twenty", 5, r, "field1", SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(1, similar.Length); assertEquals("twenty-one", similar[0]); } { string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1", SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); } { string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1", SuggestMode.SUGGEST_ALWAYS); assertEquals(5, similar.Length); assertEquals("eight", similar[0]); } { string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1", SuggestMode.SUGGEST_ALWAYS); assertEquals(5, similar.Length); assertEquals("eighty", similar[0]); } } }
/// <summary> /// 执行搜索 /// </summary> /// <param name="options">搜索选项</param> /// <param name="safeSearch">启用安全搜索</param> /// <returns></returns> private ILuceneSearchResultCollection PerformSearch(SearchOptions options, bool safeSearch) { // 结果集 ILuceneSearchResultCollection results = new LuceneSearchResultCollection(); using var reader = DirectoryReader.Open(_directory); var searcher = new IndexSearcher(reader); Query query; // 启用安全搜索 if (safeSearch) { options.Keywords = QueryParserBase.Escape(options.Keywords); } if (options.Fields.Count == 1) { // 单字段搜索 var queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer); query = queryParser.Parse(options.Keywords); } else { // 多字段搜索 var multiFieldQueryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts); query = GetFuzzyquery(multiFieldQueryParser, options.Keywords); } var sortFields = new List <SortField> { SortField.FIELD_SCORE }; sortFields.AddRange(options.OrderBy.Select(sortField => new SortField(sortField, SortFieldType.STRING))); // 排序规则处理 var sort = new Sort(sortFields.ToArray()); Expression <Func <ScoreDoc, bool> > where = _ => true; if (options.Type != null) { // 过滤掉已经设置了类型的对象 @where = @where.And(m => options.Type.AssemblyQualifiedName == searcher.Doc(m.Doc).Get("Type")); } var matches = searcher.Search(query, null, options.MaximumNumberOfHits, sort, true, true).ScoreDocs.Where(@where.Compile()); results.TotalHits = matches.Count(); // 分页处理 if (options.Skip.HasValue) { matches = matches.Skip(options.Skip.Value); } if (options.Take.HasValue) { matches = matches.Take(options.Take.Value); } var docs = matches.ToList(); // 创建结果集 foreach (var match in docs) { var doc = searcher.Doc(match.Doc); results.Results.Add(new LuceneSearchResult() { Score = match.Score, Document = doc }); } return(results); }
public virtual void TestSetBufferSize() { var indexDir = CreateTempDir("testSetBufferSize"); var dir = new MockFSDirectory(indexDir, Random); try { var writer = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetOpenMode(OpenMode.CREATE) .SetMergePolicy(NewLogMergePolicy(false))); for (int i = 0; i < 37; i++) { var doc = new Document(); doc.Add(NewTextField("content", "aaa bbb ccc ddd" + i, Field.Store.YES)); doc.Add(NewTextField("id", "" + i, Field.Store.YES)); writer.AddDocument(doc); } dir.AllIndexInputs.Clear(); IndexReader reader = DirectoryReader.Open(writer, true); var aaa = new Term("content", "aaa"); var bbb = new Term("content", "bbb"); reader.Dispose(); dir.TweakBufferSizes(); writer.DeleteDocuments(new Term("id", "0")); reader = DirectoryReader.Open(writer, true); var searcher = NewSearcher(reader); var hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs; dir.TweakBufferSizes(); Assert.AreEqual(36, hits.Length); reader.Dispose(); dir.TweakBufferSizes(); writer.DeleteDocuments(new Term("id", "4")); reader = DirectoryReader.Open(writer, true); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs; dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.TweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).ScoreDocs; dir.TweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000).ScoreDocs; dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length); writer.Dispose(); reader.Dispose(); } finally { indexDir.Delete(true); } }
public static void Main(string[] args) { if (args.Length < 4 || args.Length > 5) { // LUCENENET specific - our wrapper console shows correct usage throw new ArgumentException(); //Console.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]"); //Console.Error.WriteLine("topicsFile: input file containing queries"); //Console.Error.WriteLine("qrelsFile: input file containing relevance judgements"); //Console.Error.WriteLine("submissionFile: output submission file for trec_eval"); //Console.Error.WriteLine("indexDir: index directory"); //Console.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:"); //Console.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)"); //Environment.Exit(1); } FileInfo topicsFile = new FileInfo(args[0]); FileInfo qrelsFile = new FileInfo(args[1]); SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); using (Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3]))) using (IndexReader reader = DirectoryReader.Open(dir)) { string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexSearcher searcher = new IndexSearcher(reader); int maxResults = 1000; string docNameField = "docname"; TextWriter logger = Console.Out; //new StreamWriter(Console, Encoding.GetEncoding(0)); // use trec utilities to read trec topics into quality queries TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8)); // prepare judge, with trec utilities that read from a QRels file IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8)); // validate topics & judgments match each other judge.ValidateData(qqs, logger); ISet <string> fieldSet = new JCG.HashSet <string>(); if (fieldSpec.IndexOf('T') >= 0) { fieldSet.Add("title"); } if (fieldSpec.IndexOf('D') >= 0) { fieldSet.Add("description"); } if (fieldSpec.IndexOf('N') >= 0) { fieldSet.Add("narrative"); } // set the parsing of quality queries into Lucene queries. IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body"); // run the benchmark QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); qrun.MaxResults = maxResults; QualityStats[] stats = qrun.Execute(judge, submitLog, logger); // print an avarage sum of the results QualityStats avg = QualityStats.Average(stats); avg.Log("SUMMARY", 2, logger, " "); } }
public List <ResultData> GetResults(Query query, List <RequiredHighlight> requiredHighlights) { try { Directory indexDir = new SimpleFSDirectory(ConfigurationManager.IndexDir); DirectoryReader reader = DirectoryReader.Open(indexDir); IndexSearcher isr = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.Create( 10, true); List <ResultData> results = new List <ResultData>(); isr.Search(query, collector); ScoreDoc[] hits = collector.GetTopDocs().ScoreDocs; ResultData re; Document doc; Highlighter hi; foreach (ScoreDoc sd in hits) { doc = isr.Doc(sd.Doc); string[] allKeywords = doc.GetValues("keyword"); string keywords = ""; foreach (string keyword in allKeywords) { keywords += keyword.Trim() + " "; } keywords = keywords.Trim(); string title = doc.Get("title"); string location = doc.Get("filename"); string author = doc.Get("author"); int category = Int32.Parse(doc.Get("category")); string language = doc.Get("language"); string highlight = ""; string text = GetDocumentText(location); foreach (var item in requiredHighlights) { hi = new Highlighter(new QueryScorer(query, reader, item.FieldName)); try { highlight += hi.GetBestFragment(analyzer, item.FieldName, text); } catch (Exception e) { Console.WriteLine(e.Message + " on" + e.StackTrace); } } re = new ResultData() { Title = title, Filename = location, Keywords = keywords, CategoryId = category, Author = author, Highlight = highlight, }; results.Add(re); } reader.Dispose(); return(results); } catch (Exception e) { Console.WriteLine(e.Source); return(new List <ResultData>()); } }
public void TestMVGroupedFacetingWithDeletes() { string groupField = "hotel"; FieldType customType = new FieldType(); customType.IsStored = (true); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); bool useDv = false; // Cannot assert this since we use NoMergePolicy: w.DoRandomForceMergeAssert = (false); // 0 Document doc = new Document(); doc.Add(new StringField("x", "x", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); w.Commit(); w.DeleteDocuments(new TermQuery(new Term("airport", "ams"))); // 2 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); AddField(doc, groupField, "a", useDv); doc.Add(new StringField("airport", "dus", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); // 6 doc = new Document(); AddField(doc, groupField, "b", useDv); doc.Add(new StringField("airport", "ams", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 7 doc = new Document(); doc.Add(new StringField("x", "x", Field.Store.NO)); w.AddDocument(doc); w.Commit(); w.Dispose(); IndexSearcher indexSearcher = NewSearcher(DirectoryReader.Open(dir)); AbstractGroupFacetCollector groupedAirportFacetCollector = CreateRandomCollector(groupField, "airport", null, true); indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector); TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false); assertEquals(3, airportResult.TotalCount); assertEquals(1, airportResult.TotalMissingCount); IList <TermGroupFacetCollector.FacetEntry> entries = airportResult.GetFacetEntries(0, 10); assertEquals(2, entries.size()); assertEquals("ams", entries[0].Value.Utf8ToString()); assertEquals(2, entries[0].Count); assertEquals("dus", entries[1].Value.Utf8ToString()); assertEquals(1, entries[1].Count); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestPerFieldCodec() { int NUM_DOCS = AtLeast(173); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } using BaseDirectoryWrapper dir = NewDirectory(); dir.CheckIndexOnDispose = false; // we use a custom codec provider using IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3))); Documents.Document doc = new Documents.Document(); // uses default codec: doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO)); // uses pulsing codec: Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO); doc.Add(field2); Field idField = NewStringField("id", "", Field.Store.NO); doc.Add(idField); for (int i = 0; i < NUM_DOCS; i++) { idField.SetStringValue("" + i); w.AddDocument(doc); if ((i + 1) % 10 == 0) { w.Commit(); } } if (Verbose) { Console.WriteLine("TEST: now delete id=77"); } w.DeleteDocuments(new Term("id", "77")); using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 1, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); } if (Verbose) { Console.WriteLine("\nTEST: now delete 2nd doc"); } w.DeleteDocuments(new Term("id", "44")); if (Verbose) { Console.WriteLine("\nTEST: now force merge"); } w.ForceMerge(1); if (Verbose) { Console.WriteLine("\nTEST: now open reader"); } using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc); Assert.AreEqual(NUM_DOCS - 2, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits); if (Verbose) { Console.WriteLine("\nTEST: now close NRT reader"); } } }
private Lucene.Net.Index.IndexReader CreateReader() { return(DirectoryReader.Open(((LuceneIndex)_searchIndex).Directory, true)); }
public void TestBreakingWords() { IndexReader ir = null; try { ir = DirectoryReader.Open(dir); WordBreakSpellChecker wbsp = new WordBreakSpellChecker(); { Term term = new Term("numbers", "ninetynine"); wbsp.MaxChanges = (1); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 1); assertTrue(sw[0].Length == 2); assertTrue(sw[0][0].String.equals("ninety")); assertTrue(sw[0][1].String.equals("nine")); assertTrue(sw[0][0].Score == 1); assertTrue(sw[0][1].Score == 1); } { Term term = new Term("numbers", "onethousand"); wbsp.MaxChanges = (1); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 1); assertTrue(sw[0].Length == 2); assertTrue(sw[0][0].String.equals("one")); assertTrue(sw[0][1].String.equals("thousand")); assertTrue(sw[0][0].Score == 1); assertTrue(sw[0][1].Score == 1); wbsp.MaxChanges = (2); wbsp.MinSuggestionFrequency = (1); sw = wbsp.SuggestWordBreaks(term, 1, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 1); assertTrue(sw[0].Length == 2); wbsp.MaxChanges = (2); wbsp.MinSuggestionFrequency = (2); sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 1); assertTrue(sw[0].Length == 2); wbsp.MaxChanges = (2); wbsp.MinSuggestionFrequency = (1); sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 2); assertTrue(sw[0].Length == 2); assertTrue(sw[0][0].String.equals("one")); assertTrue(sw[0][1].String.equals("thousand")); assertTrue(sw[0][0].Score == 1); assertTrue(sw[0][1].Score == 1); assertTrue(sw[0][1].Freq > 1); assertTrue(sw[0][0].Freq > sw[0][1].Freq); assertTrue(sw[1].Length == 3); assertTrue(sw[1][0].String.equals("one")); assertTrue(sw[1][1].String.equals("thou")); assertTrue(sw[1][2].String.equals("sand")); assertTrue(sw[1][0].Score == 2); assertTrue(sw[1][1].Score == 2); assertTrue(sw[1][2].Score == 2); assertTrue(sw[1][0].Freq > 1); assertTrue(sw[1][1].Freq == 1); assertTrue(sw[1][2].Freq == 1); } { Term term = new Term("numbers", "onethousandonehundredeleven"); wbsp.MaxChanges = (3); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 0); wbsp.MaxChanges = (4); sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 1); assertTrue(sw[0].Length == 5); wbsp.MaxChanges = (5); sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 2); assertTrue(sw[0].Length == 5); assertTrue(sw[0][1].String.equals("thousand")); assertTrue(sw[1].Length == 6); assertTrue(sw[1][1].String.equals("thou")); assertTrue(sw[1][2].String.equals("sand")); } { //make sure we can handle 2-char codepoints Term term = new Term("numbers", "\uD864\uDC79"); wbsp.MaxChanges = (1); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); assertTrue(sw.Length == 0); } } catch (Exception e) { throw e; } finally { try { ir.Dispose(); } catch (Exception /*e1*/) { } } }
public static List <int> Search(string key, int page, int pageSize, out int totalHits) { totalHits = 0; if (string.IsNullOrEmpty(key)) { return(null); } key = key.Trim().ToLower(); var rs = new List <int>(); try { var indexDir = Path.Combine(System.IO.Directory.GetCurrentDirectory(), "lucene"); //var VERSION = Lucene.Net.Util.LuceneVersion.LUCENE_48; if (System.IO.Directory.Exists(indexDir) == true) { var reader = DirectoryReader.Open(FSDirectory.Open(new DirectoryInfo(indexDir))); var search = new IndexSearcher(reader); var directory = FSDirectory.Open(new DirectoryInfo(indexDir), NoLockFactory.GetNoLockFactory()); var reader2 = DirectoryReader.Open(directory); var search2 = new IndexSearcher(reader2); //var parser = new QueryParser(VERSION, "title", new JieBaAnalyzer(TokenizerMode.Search)); var booleanQuery = new BooleanQuery(); var list = CutKeyWord(key); foreach (var word in list) { var query1 = new TermQuery(new Term("title", word)); var query2 = new TermQuery(new Term("summary", word)); booleanQuery.Add(query1, Occur.SHOULD); booleanQuery.Add(query2, Occur.SHOULD); } var collector = TopScoreDocCollector.Create(1000, true); search2.Search(booleanQuery, null, collector); var docs = collector.GetTopDocs((page - 1) * pageSize, (page) * pageSize).ScoreDocs; totalHits = collector.TotalHits; foreach (var d in docs) { var num = d.Doc; var document = search.Doc(num);// 拿到指定的文档 var articleId = document.Get("articleId"); //var name = document.Get("title"); if (int.TryParse(articleId, out int mid) == true) { rs.Add(mid); } } } } catch (Exception ex) { Console.WriteLine($"SearchMerchs ex={ex}"); } return(rs); }
public void GRandom() { int numDocs = TestUtil.NextInt(Random(), (10 * RANDOM_MULTIPLIER), (100 * RANDOM_MULTIPLIER)); Directory dir = null; RandomIndexWriter writer = null; IndexReader ir = null; try { dir = NewDirectory(); writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), Similarity, TimeZone); int maxLength = TestUtil.NextInt(Random(), 5, 50); List <string> originals = new List <string>(numDocs); List <string[]> breaks = new List <string[]>(numDocs); for (int i = 0; i < numDocs; i++) { string orig = ""; if (Random().nextBoolean()) { while (!GoodTestString(orig)) { orig = TestUtil.RandomSimpleString(Random(), maxLength); } } else { while (!GoodTestString(orig)) { orig = TestUtil.RandomUnicodeString(Random(), maxLength); } } originals.Add(orig); int totalLength = orig.CodePointCount(0, orig.Length); int breakAt = orig.OffsetByCodePoints(0, TestUtil.NextInt(Random(), 1, totalLength - 1)); string[] broken = new string[2]; broken[0] = orig.Substring(0, breakAt - 0); broken[1] = orig.Substring(breakAt); breaks.Add(broken); Document doc = new Document(); doc.Add(NewTextField("random_break", broken[0] + " " + broken[1], Field.Store.NO)); doc.Add(NewTextField("random_combine", orig, Field.Store.NO)); writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); ir = DirectoryReader.Open(dir); WordBreakSpellChecker wbsp = new WordBreakSpellChecker(); wbsp.MaxChanges = (1); wbsp.MinBreakWordLength = (1); wbsp.MinSuggestionFrequency = (1); wbsp.MaxCombineWordLength = (maxLength); for (int i = 0; i < originals.size(); i++) { string orig = originals[i]; string left = breaks[i][0]; string right = breaks[i][1]; { Term term = new Term("random_break", orig); SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); bool failed = true; foreach (SuggestWord[] sw1 in sw) { assertTrue(sw1.Length == 2); if (sw1[0].String.equals(left) && sw1[1].String.equals(right)) { failed = false; } } assertFalse("Failed getting break suggestions\n >Original: " + orig + "\n >Left: " + left + "\n >Right: " + right, failed); } { Term[] terms = { new Term("random_combine", left), new Term("random_combine", right) }; CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS); bool failed = true; foreach (CombineSuggestion cs1 in cs) { assertTrue(cs1.OriginalTermIndexes.Length == 2); if (cs1.Suggestion.String.equals(left + right)) { failed = false; } } assertFalse("Failed getting combine suggestions\n >Original: " + orig + "\n >Left: " + left + "\n >Right: " + right, failed); } } } catch (Exception e) { throw e; } finally { try { ir.Dispose(); } catch (Exception /*e1*/) { } try { writer.Dispose(); } catch (Exception /*e1*/) { } try { dir.Dispose(); } catch (Exception /*e1*/) { } } }
public void TestReadAndWrite() { var connectionString = _connectionString ?? "UseDevelopmentStorage=true"; var cloudStorageAccount = CloudStorageAccount.Parse(connectionString); const string containerName = "testcatalog"; var blobClient = cloudStorageAccount.CreateCloudBlobClient(); var container = blobClient.GetContainerReference(containerName); container.DeleteIfExists(); var azureDirectory = new AzureDirectory(cloudStorageAccount, containerName); var indexWriterConfig = new IndexWriterConfig( Lucene.Net.Util.LuceneVersion.LUCENE_48, new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48)); int dog = 0, cat = 0, car = 0; using (var indexWriter = new IndexWriter(azureDirectory, indexWriterConfig)) { for (var iDoc = 0; iDoc < 1000; iDoc++) { var bodyText = GeneratePhrase(40); var doc = new Document { new TextField("id", DateTime.Now.ToFileTimeUtc() + "-" + iDoc, Field.Store.YES), new TextField("Title", GeneratePhrase(10), Field.Store.YES), new TextField("Body", bodyText, Field.Store.YES) }; dog += bodyText.Contains(" dog ") ? 1 : 0; cat += bodyText.Contains(" cat ") ? 1 : 0; car += bodyText.Contains(" car ") ? 1 : 0; indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}, {1} dog, {2} cat, {3} car", indexWriter.NumDocs, dog, cat, car); } try { var ireader = DirectoryReader.Open(azureDirectory); for (var i = 0; i < 100; i++) { var searcher = new IndexSearcher(ireader); var searchForPhrase = SearchForPhrase(searcher, "dog"); Assert.AreEqual(dog, searchForPhrase); searchForPhrase = SearchForPhrase(searcher, "cat"); Assert.AreEqual(cat, searchForPhrase); searchForPhrase = SearchForPhrase(searcher, "car"); Assert.AreEqual(car, searchForPhrase); } Console.WriteLine("Tests passsed"); } catch (Exception x) { Console.WriteLine("Tests failed:\n{0}", x); } finally { // check the container exists, and delete it Assert.IsTrue(container.Exists()); // check the container exists container.Delete(); } }
public void TestCombiningWords() { IndexReader ir = null; try { ir = DirectoryReader.Open(dir); WordBreakSpellChecker wbsp = new WordBreakSpellChecker(); { Term[] terms = { new Term("numbers", "one"), new Term("numbers", "hun"), new Term("numbers", "dred"), new Term("numbers", "eight"), new Term("numbers", "y"), new Term("numbers", "eight"), }; wbsp.MaxChanges = (3); wbsp.MaxCombineWordLength = (20); wbsp.MinSuggestionFrequency = (1); CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS); assertTrue(cs.Length == 5); assertTrue(cs[0].OriginalTermIndexes.Length == 2); assertTrue(cs[0].OriginalTermIndexes[0] == 1); assertTrue(cs[0].OriginalTermIndexes[1] == 2); assertTrue(cs[0].Suggestion.String.equals("hundred")); assertTrue(cs[0].Suggestion.Score == 1); assertTrue(cs[1].OriginalTermIndexes.Length == 2); assertTrue(cs[1].OriginalTermIndexes[0] == 3); assertTrue(cs[1].OriginalTermIndexes[1] == 4); assertTrue(cs[1].Suggestion.String.equals("eighty")); assertTrue(cs[1].Suggestion.Score == 1); assertTrue(cs[2].OriginalTermIndexes.Length == 2); assertTrue(cs[2].OriginalTermIndexes[0] == 4); assertTrue(cs[2].OriginalTermIndexes[1] == 5); assertTrue(cs[2].Suggestion.String.equals("yeight")); assertTrue(cs[2].Suggestion.Score == 1); for (int i = 3; i < 5; i++) { assertTrue(cs[i].OriginalTermIndexes.Length == 3); assertTrue(cs[i].Suggestion.Score == 2); assertTrue( (cs[i].OriginalTermIndexes[0] == 1 && cs[i].OriginalTermIndexes[1] == 2 && cs[i].OriginalTermIndexes[2] == 3 && cs[i].Suggestion.String.equals("hundredeight")) || (cs[i].OriginalTermIndexes[0] == 3 && cs[i].OriginalTermIndexes[1] == 4 && cs[i].OriginalTermIndexes[2] == 5 && cs[i].Suggestion.String.equals("eightyeight")) ); } cs = wbsp.SuggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(cs.Length == 2); assertTrue(cs[0].OriginalTermIndexes.Length == 2); assertTrue(cs[0].Suggestion.Score == 1); assertTrue(cs[0].OriginalTermIndexes[0] == 1); assertTrue(cs[0].OriginalTermIndexes[1] == 2); assertTrue(cs[0].Suggestion.String.equals("hundred")); assertTrue(cs[0].Suggestion.Score == 1); assertTrue(cs[1].OriginalTermIndexes.Length == 3); assertTrue(cs[1].Suggestion.Score == 2); assertTrue(cs[1].OriginalTermIndexes[0] == 1); assertTrue(cs[1].OriginalTermIndexes[1] == 2); assertTrue(cs[1].OriginalTermIndexes[2] == 3); assertTrue(cs[1].Suggestion.String.equals("hundredeight")); } } catch (Exception e) { throw e; } finally { try { ir.Dispose(); } catch (Exception /*e1*/) { } } }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); Random rand = Random; List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; docs.Remove(termToDel); assertTrue(null != toDel); } IndexReader ir = DirectoryReader.Open(dir); assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0); assertEquals(ir.NumDocs, docs.size()); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME); IInputEnumerator inputIterator = dictionary.GetEntryEnumerator(); while (inputIterator.MoveNext()) { string field = inputIterator.Current.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, w2 + w1); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); } assertTrue(docs.Count == 0); ir.Dispose(); dir.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random(), cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int numDocs = TestUtil.NextInt(Random(), 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random().Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.w, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
private IndexReader CreateIndexReader() { lock (_syncLock) { return(_indexReader ?? (_indexReader = DirectoryReader.Open(_directory))); } }