public int DocumentCount() { EnsureSearcher(); var docs = Searcher?.Search(new MatchAllDocsQuery(), 1); return(docs?.TotalHits ?? 0); }
public int DocumentCount() { EnsureSearcher(); var totalHitsCollector = new TotalHitCountCollector(); Searcher?.Search(new MatchAllDocsQuery(), totalHitsCollector); return(totalHitsCollector.TotalHits); }
String q = args[1]; // B public static void search(String indexDir, String q) { Directory dir = FSDirectory.Open(new System.IO.FileInfo(indexDir)); // C IndexSearcher searcher = new IndexSearcher(dir, true); // D QueryParser parser = new QueryParser("contents", new StandardAnalyzer(Version.LUCENE_CURRENT)); // E Query query = parser.Parse(q); // E Lucene.Net.Search.TopDocs hits = searcher.Search(query, 10); // F System.Console.WriteLine("Found " + hits.totalHits + " document(s) that matched query '" + q + "':"); for (int i = 0; i < hits.scoreDocs.Length; i++) { ScoreDoc scoreDoc = hits.ScoreDocs[i]; // G Document doc = searcher.Doc(scoreDoc.doc); // G System.Console.WriteLine(doc.Get("filename")); // G } searcher.Close(); // H }
/// <summary> /// Search into Lucene index. /// If the <see cref="LuceneSearcherConfiguration"/> is not correct return null. /// </summary> /// <param name="searchConfiguration"></param> /// <returns></returns> public List <ILogViewModel> Search(LuceneSearcherConfiguration searchConfiguration) { if (!CheckSearchConfiguration(searchConfiguration)) { return(null); } if (searchConfiguration.ESearchMethod == ESearchMethod.FullText) { return(Search(searchConfiguration, new MultiFieldQueryParser ( LuceneVersion.LUCENE_48, searchConfiguration.Fields, new StandardAnalyzer(LuceneVersion.LUCENE_48) ).Parse(searchConfiguration.Query))); } return(searchConfiguration.WantAll ? Search(searchConfiguration, GetAll(searchConfiguration.All)) : CreateLogsResult(_indexSearcher?.Search(CreateQuery(searchConfiguration), searchConfiguration.MaxResult, _sort))); }
public void TestLazy() { int id = Random.nextInt(NUM_DOCS); IndexReader reader = DirectoryReader.Open(dir); try { Query q = new TermQuery(new Term("docid", "" + id)); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(q, 100).ScoreDocs; assertEquals("Too many docs", 1, hits.Length); LazyTestingStoredFieldVisitor visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc), FIELDS); reader.Document(hits[0].Doc, visitor); Document d = visitor.doc; int numFieldValues = 0; IDictionary <string, int> fieldValueCounts = new JCG.Dictionary <string, int>(); // at this point, all FIELDS should be Lazy and unrealized foreach (IIndexableField f in d) { numFieldValues++; if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("load_later", StringComparison.Ordinal)) { fail("load_later was loaded on first pass"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { if (!fieldValueCounts.TryGetValue(f.Name, out int count)) { count = 0; } count++; fieldValueCounts.Put(f.Name, count); assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertFalse(f.Name + " is loaded", lf.HasBeenLoaded); } } Console.WriteLine("numFieldValues == " + numFieldValues); assertEquals("numFieldValues", 1 + (NUM_VALUES * FIELDS.Length), numFieldValues); foreach (string field in fieldValueCounts.Keys) { assertEquals("fieldName count: " + field, NUM_VALUES, fieldValueCounts[field]); } // pick a single field name to load a single value string fieldName = FIELDS[Random.nextInt(FIELDS.Length)]; IIndexableField[] fieldValues = d.GetFields(fieldName); assertEquals("#vals in field: " + fieldName, NUM_VALUES, fieldValues.Length); int valNum = Random.nextInt(fieldValues.Length); assertEquals(id + "_" + fieldName + "_" + valNum, fieldValues[valNum].GetStringValue()); // now every value of fieldName should be loaded foreach (IIndexableField f in d) { if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("load_later", StringComparison.Ordinal)) { fail("load_later was loaded too soon"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertEquals(f.Name + " is loaded?", lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded); } } // use the same LazyDoc to ask for one more lazy field visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc), "load_later"); reader.Document(hits[0].Doc, visitor); d = visitor.doc; // ensure we have all the values we expect now, and that // adding one more lazy field didn't "unload" the existing LazyField's // we already loaded. foreach (IIndexableField f in d) { if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertEquals(f.Name + " is loaded?", lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded); } } // even the underlying doc shouldn't have never_load assertNull("never_load was loaded in wrapped doc", visitor.lazyDoc.GetDocument().GetField("never_load")); } finally { reader.Dispose(); } }
// LUCENE-1404 private int HitCount(IndexSearcher searcher, string word) { return(searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits); }
private static List <SearchDoc> lucene_search(Guid applicationId, int lowerBoundary, int count, ref Query query, ref IndexSearcher searcher, bool additionalId, bool title, bool description, bool content, bool tags, bool fileContent) { try { List <SearchDoc> listDocs = new List <SearchDoc>(); TopDocs hits = searcher.Search(query, lowerBoundary + count + (count / 2)); FastVectorHighlighter fvHighlighter = new FastVectorHighlighter(true, true); for (int i = lowerBoundary, lnt = hits.ScoreDocs.Length; i < lnt; ++i) { ScoreDoc sd = hits.ScoreDocs[i]; string addIdFr = !additionalId ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "AdditionalID", fragCharSize: 200); string titleFr = !title ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "Title", fragCharSize: 200); string descFr = !description ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "Description", fragCharSize: 200); string contentFr = !content ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "Content", fragCharSize: 200); string tagsFr = !tags ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "Tags", fragCharSize: 200); string fileFr = !fileContent ? string.Empty : fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query), searcher.IndexReader, docId: sd.Doc, fieldName: "FileContent", fragCharSize: 200); if (!string.IsNullOrEmpty(titleFr)) { titleFr = titleFr.Trim(); } if (!string.IsNullOrEmpty(addIdFr)) { addIdFr = addIdFr.Trim(); } string highlightedText = ((string.IsNullOrEmpty(descFr) ? string.Empty : descFr + " ") + (string.IsNullOrEmpty(contentFr) ? string.Empty : contentFr + " ") + (string.IsNullOrEmpty(tagsFr) ? string.Empty : tagsFr + " ") + (string.IsNullOrEmpty(fileFr) ? string.Empty : fileFr)).Trim(); if (string.IsNullOrEmpty(addIdFr) && string.IsNullOrEmpty(titleFr) && string.IsNullOrEmpty(highlightedText)) { break; } Document doc = searcher.Doc(sd.Doc); SearchDoc item = SearchDoc.ToSearchDoc(doc); item.Description = highlightedText; listDocs.Add(item); } return(listDocs); } catch (Exception ex) { LogController.save_error_log(applicationId, null, "SearchIndexDocuments", ex, ModuleIdentifier.SRCH); return(new List <SearchDoc>()); } }
public virtual void TestRandomSearchPerformance() { IndexSearcher searcher = new IndexSearcher(Reader); foreach (Term t in SampleTerms) { TermQuery query = new TermQuery(t); TopDocs topDocs = searcher.Search(query, 10); Assert.IsTrue(topDocs.TotalHits > 0); } }
public virtual void TestSimple() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.NUMERIC, DocValuesType.BINARY, DocValuesType.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); var cmp = Comparer <AbstractDistinctValuesCollector.IGroupCount <IComparable> > .Create((groupCount1, groupCount2) => { if (groupCount1.GroupValue == null) { if (groupCount2.GroupValue == null) { return(0); } return(-1); } else if (groupCount2.GroupValue == null) { return(1); } else { return(groupCount1.GroupValue.CompareTo(groupCount2.GroupValue)); } }); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparer); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparer); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestNumericRangeQuery() { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); //Console.WriteLine("\t" + result); } }
public void TestRegexQuery() { const int maxNumFragmentsRequired = 2; query = new RegexQuery(new Term(FIELD_NAME, "ken.*")); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); var scorer = new QueryScorer(query, FIELD_NAME); var highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void DoSearching(Query unReWrittenQuery) { searcher = new IndexSearcher(ramDir, true); // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) // you must use a rewritten query! query = unReWrittenQuery.Rewrite(reader); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); hits = searcher.Search(query, null, 1000); }
private void SearchIndex() { String q = "t_text1:random"; QueryParser parser = new QueryParser(TEST_VERSION, "t_text1", a); Query query = parser.Parse(q); IndexSearcher searcher = new IndexSearcher(dir, true); // This scorer can return negative idf -> null fragment IScorer scorer = new QueryTermScorer(query, searcher.IndexReader, "t_text1"); // This scorer doesn't use idf (patch version) //Scorer scorer = new QueryTermScorer( query, "t_text1" ); Highlighter h = new Highlighter(scorer); TopDocs hits = searcher.Search(query, null, 10); for (int i = 0; i < hits.TotalHits; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String result = h.GetBestFragment(a, "t_text1", doc.Get("t_text1")); Console.WriteLine("result:" + result); Assert.AreEqual(result, "more <B>random</B> words for second field"); } searcher.Close(); }
public void TestUnRewrittenQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; // test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir, true); Analyzer analyzer = new StandardAnalyzer(TEST_VERSION); QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); Console.WriteLine("Searching with primitive query"); // forget to set this and... // query=query.Rewrite(reader); TopDocs hits = searcher.Search(query, null, 1000); // create an instance of the highlighter with the tags used to surround // highlighted text // QueryHighlightExtractor highlighter = new // QueryHighlightExtractor(this, // query, new StandardAnalyzer(TEST_VERSION)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this, false); highlighter.TextFragmenter = new SimpleFragmenter(40); String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine(highlightedText); } // We expect to have zero highlights if the query is multi-terms and is // not // rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.IsTrue(results.Count == 0); try { facets.GetSpecificValue("a"); fail("should have hit exc"); } catch (ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); fail("should have hit exc"); } catch (ArgumentException) { // expected } IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandomSampling() { Directory dir = NewDirectory(); Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(10000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO)); doc.Add(new FacetField("iMod10", Convert.ToString(i % 10))); writer.AddDocument(config.Build(taxoWriter, doc)); } Random random = Random(); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); IOUtils.Dispose(writer, taxoWriter); // Test empty results RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // There should be no divisions by zero searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults); // There should be no divisions by zero and no null result Assert.NotNull(collectRandomZeroResults.GetMatchingDocs()); // There should be no results at all foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs()) { Assert.AreEqual(0, doc.TotalHits); } // Now start searching and retrieve results. // Use a query to select half of the documents. TermQuery query = new TermQuery(new Term("EvenOdd", "even")); // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i % // 10) are hits. // there is a REAL small chance that one of the 5 values will be missed when // sampling. // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be // missing) ~ 10^-193 // so that is probably not going to happen. int maxNumChildren = 5; RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits FacetsCollector fc = new FacetsCollector(); searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent)); FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent); FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent); FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher); FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10"); FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10"); Assert.AreEqual(random100Result, exactResult); // we should have five children, but there is a small chance we have less. // (see above). Assert.True(random10Result.ChildCount <= maxNumChildren); // there should be one child at least. Assert.True(random10Result.ChildCount >= 1); // now calculate some statistics to determine if the sampled result is 'ok'. // because random sampling is used, the results will vary each time. int sum = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { sum += (int)lav.Value; } float mu = (float)sum / (float)maxNumChildren; float variance = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { variance += (float)Math.Pow((mu - (int)lav.Value), 2); } variance = variance / maxNumChildren; float sigma = (float)Math.Sqrt(variance); // we query only half the documents and have 5 categories. The average // number of docs in a category will thus be the total divided by 5*2 float targetMu = numDocs / (5.0f * 2.0f); // the average should be in the range and the standard deviation should not // be too great Assert.True(sigma < 200); Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma); IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public void TestConstantScoreMultiTermQuery() { numHighlights = 0; query = new WildcardQuery(new Term(FIELD_NAME, "ken*")); ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; searcher = new IndexSearcher(ramDir, true); // can't rewrite ConstantScore if you want to highlight it - // it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.Rewrite(reader); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); hits = searcher.Search(query, null, 1000); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try null field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, null); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try default field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new AnonymousPriorityQueue(this, 5); Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5); try { TermsEnum iterator = terms.GetIterator(null); while (iterator.Next() != null) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List <TermAndFreq> highTerms = QueueToList(highFreqQueue); List <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
public void TestQueryScorerHits() { Analyzer analyzer = new SimpleAnalyzer(); QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); query = qp.Parse("\"very long\""); searcher = new IndexSearcher(ramDir, true); TopDocs hits = searcher.Search(query, 10); QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(scorer); for (int i = 0; i < hits.ScoreDocs.Length; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String storedField = doc.Get(FIELD_NAME); TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc, FIELD_NAME, doc, analyzer); IFragmenter fragmenter = new SimpleSpanFragmenter(scorer); highlighter.TextFragmenter = fragmenter; String fragment = highlighter.GetBestFragment(stream, storedField); Console.WriteLine(fragment); } }
private int RunQuery(IndexSearcher s, Query q) { s.Search(q, 10); int hitCount = s.Search(q, null, 10, new Sort(new SortField("title", SortField.Type_e.STRING))).TotalHits; if (DefaultCodecSupportsDocValues()) { Sort dvSort = new Sort(new SortField("title", SortField.Type_e.STRING)); int hitCount2 = s.Search(q, null, 10, dvSort).TotalHits; Assert.AreEqual(hitCount, hitCount2); } return hitCount; }
/// <summary> /// Searches the index for the querytext and displays a ranked list of results to the screen /// </summary> /// <param name="querytext">The text to search the index</param> private string SearchAndDisplayResults(string querytext, long qid, List <long> relevantList) { System.Console.WriteLine("Searching for " + querytext); querytext = querytext.ToLower(); Query query = parser.Parse(querytext); System.Console.WriteLine($"Searching for { query.ToString()}"); TopDocs results = searcher.Search(query, MAX_QUERY); // create highlighter - using strong tag to highlight in this case (change as needed) //IFormatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); // excerpt set to 200 characters in length var fragmenter = new SimpleFragmenter(3000); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter }; long rank = 0; float topscore = 0f; long foundrelevants = 0; List <TrecItem> logItems = new List <TrecItem>(); SearchedListViewModel.DeleteAll(); foreach (ScoreDoc scoreDoc in results.ScoreDocs) { if (rank == 0) { topscore = scoreDoc.Score; } rank++; Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc); long id = Convert.ToInt64(doc.Get(PID_FN).ToString()); CollectionPassage ps = collectionProvider.Passages[id]; // Logging Trec logItems.Add(new TrecItem(0, id, rank, scoreDoc.Score)); // get highlighted fragment TokenStream stream = analyzer.TokenStream("", new StringReader(ps.passage_text)); string highlighted = highlighter.GetBestFragment(stream, ps.passage_text); //string url2 = doc.Get(TEXT_FN).ToString(); //Console.WriteLine("Rank " + rank + " text " + myFieldValue); if (highlighted == null) { highlighted = ps.passage_text; } if (relevantList.Contains(id)) { foundrelevants++; } SearchedListViewModel.Add(scoreDoc.Score / topscore, id, ps.GetTitle(), ps.url, highlighted, relevantList.Contains(id)); //Console.WriteLine("==>" + highlighted); } StatusBarViewModel.Instance.NumRelevants = "Num Relevants : " + foundrelevants.ToString() + "/" + relevantList.Count.ToString(); StatusBarViewModel.Instance.NumSearch = "Num Searched :" + results.ScoreDocs.Length.ToString(); // Logging Trec trecLogger.Logging(qid, logItems); //Console.WriteLine(string.Join(",", relevantList)); return(query.ToString()); }
// Make sure the documents returned by the search match the expected list // Copied from TestSort.java private void AssertMatches(IndexSearcher searcher, Query query, Sort sort, string expectedResult) { ScoreDoc[] result = searcher.Search(query, null, 1000, sort).ScoreDocs; StringBuilder buff = new StringBuilder(10); int n = result.Length; for (int i = 0; i < n; ++i) { Document doc = searcher.Doc(result[i].Doc); IndexableField[] v = doc.GetFields("tracer"); for (int j = 0; j < v.Length; ++j) { buff.Append(v[j].StringValue); } } Assert.AreEqual(expectedResult, buff.ToString()); }
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary> /// <param name="word">String the word you want a spell check done on /// </param> /// <param name="numSug">int the number of suggest words /// </param> /// <param name="ir">the indexReader of the user index (can be null see field param) /// </param> /// <param name="field">String the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. /// </param> /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word /// (only if restricted mode = (indexReader!=null and field!=null) /// </param> /// <throws> IOException </throws> /// <returns> String[] the sorted list of the suggest words with this 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual System.String[] SuggestSimilar(System.String word, int numSug, IndexReader ir, System.String field, bool morePopular) { // obtainSearcher calls ensureOpen IndexSearcher indexSearcher = ObtainSearcher(); try { float min = this.minScore; int lengthWord = word.Length; int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0; int goalFreq = (morePopular && ir != null && field != null) ? freq : 0; // if the word exists in the real index and we don't care for word frequency, return the word itself if (!morePopular && freq > 0) { return(new String[] { word }); } var query = new BooleanQuery(); String[] grams; String key; var alreadySeen = new HashSet <string>(); for (var ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } int maxHits = 10 * numSug; // System.out.println("Q: " + query); ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs; // System.out.println("HITS: " + hits.length()); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug); // go thru more than 'maxr' matches in case the distance filter triggers int stop = Math.Min(hits.Length, maxHits); SuggestWord sugWord = new SuggestWord(); for (int i = 0; i < stop; i++) { sugWord.termString = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word // don't suggest a word for itself, that would be silly if (sugWord.termString.Equals(word)) { continue; } // edit distance sugWord.score = sd.GetDistance(word, sugWord.termString); if (sugWord.score < min) { continue; } if (ir != null && field != null) { // use the user index sugWord.freq = ir.DocFreq(new Term(field, sugWord.termString)); // freq in the index // don't suggest a word that is not present in the field if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) { continue; } } if (alreadySeen.Add(sugWord.termString) == false) // we already seen this word, no point returning it twice { continue; } sugQueue.InsertWithOverflow(sugWord); if (sugQueue.Size() == numSug) { // if queue full, maintain the minScore score min = ((SuggestWord)sugQueue.Top()).score; } sugWord = new SuggestWord(); } // convert to array string String[] list = new String[sugQueue.Size()]; for (int i = sugQueue.Size() - 1; i >= 0; i--) { list[i] = ((SuggestWord)sugQueue.Pop()).termString; } return(list); } finally { ReleaseSearcher(indexSearcher); } }
public static List <Obj> SearchByLoai(string q, string loai, int from, int size, out int total) { var directory = FSDirectory.Open(new DirectoryInfo(Dic)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var indexReader = IndexReader.Open(directory, true); var indexSearch = new IndexSearcher(indexReader); var mainQuery = new BooleanQuery(); if (!string.IsNullOrEmpty(q)) { var queryParser = new QueryParser(Version.LUCENE_29, "SearchContent", analyzer); var query = queryParser.Parse(q); mainQuery.Add(query, BooleanClause.Occur.MUST); } var queryParserLoai = new QueryParser(Version.LUCENE_29, "Loai", analyzer); var queryLoai = queryParserLoai.Parse(loai); mainQuery.Add(queryLoai, BooleanClause.Occur.MUST); var resultDocs = indexSearch.Search(mainQuery, indexReader.MaxDoc()); var hits = resultDocs.scoreDocs; total = hits.Length; var list = hits.Select(hit => indexSearch.Doc(hit.doc)).Select(documentFromSearcher => new Obj() { Kieu = documentFromSearcher .Get( "Loai") , RowId = new Guid( documentFromSearcher .Get( "RowId")) , Id = new Guid( documentFromSearcher .Get( "ID")) , Url = documentFromSearcher .Get( "Url") , NoiDung = documentFromSearcher .Get( "NoiDung") , Ten = documentFromSearcher .Get( "Ten") }).Skip( from).Take( size).ToList(); indexSearch.Close(); directory.Close(); return(list); }
public IEnumerable <Article> Search(string searchTerm, string[] fields) { //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα if (!System.IO.Directory.Exists(indexPath)) { throw new NullReferenceException("Index Does Not Exist"); } //Αρχικοποίηση μεταβλητών luceneIndexDirectory = FSDirectory.Open(indexPath); List <Article> CompleteResults = new List <Article>(); if (searchTerm != "" && fields.Length != 0) { //Αρχικοποίηση μεταβλητών IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory); //Δημιουργία Searcher κειμένου MultiFieldQueryParser allFieldsSearcher = new MultiFieldQueryParser(LuceneVersion, fields, analyzer); //Parce το όρο αναζήτησης Query query = allFieldsSearcher.Parse(searchTerm); //Δημιουργία collector που θα φέρει τα 100 πρώτα αποτελέσματα TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(3200, true); //Πραγματοποίηση αναζήτησης searcher.Search(query, topScoreDocCollector); //Προσθήκη αποτελεσμάτων σε λίστα ScoreDoc[] hits = topScoreDocCollector.TopDocs().ScoreDocs; List <Article> results = new List <Article>(); //Ανατρέχουμε τη λίστα αποτελεσμάτων με τη λίστα των άρθρων για να //επιστρέψουμε στο χρήστη τα ολόκληρα τα άρθρα. foreach (ScoreDoc hit in hits) { Article art = new Article(); int docId = hit.Doc; float score = hit.Score; Document document = searcher.Doc(docId); art.Score = Convert.ToDouble(score.ToString("0.0000")); art.Id = Convert.ToInt32(document.Get("ID")); results.Add(art); } IEnumerable <Article> Articles = ArticleReader.ReadArticles(@"Data\cacm.all"); //Προσθέτουμε τα άρθρα στα αποτελέσματα και τα scor του κάθε άρθρου foreach (Article item in results) { foreach (Article article in Articles) { if (article.Id == item.Id) { Article art = new Article(); art = article; art.Score = item.Score; CompleteResults.Add(art); break; } } } luceneIndexDirectory.Dispose(); //Επιστρέφουμε τα αποτελέσματα στο χρήστη return(CompleteResults.OrderByDescending(x => x.Score)); } else { return(CompleteResults); } }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IIndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IIndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.GetStringValue()); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue()); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.GetStringValue()); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.GetTerms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.Int32BitsToSingle((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.SetDocument(i); long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
public IEnumerable <Article> AdvancedSearch(string searchTerm, string[] fields) { //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα if (!System.IO.Directory.Exists(indexPathLead)) { throw new NullReferenceException("Index Does Not Exist"); } //Αρχικοποίηση μεταβλητών luceneLeaderIndexDirectory = FSDirectory.Open(indexPathLead); List <Article> results = new List <Article>(); List <Article> CompleteResults2 = new List <Article>(); if (searchTerm != "" && fields.Length != 0) { //Αρχικοποίηση μεταβλητών IndexSearcher searcher = new IndexSearcher(luceneLeaderIndexDirectory); //Δημιουργία Searcher κειμένου MultiFieldQueryParser allFieldsSearcher = new MultiFieldQueryParser(LuceneVersion, fields, analyzer); //Parce το όρο αναζήτησης Query query = allFieldsSearcher.Parse(searchTerm); //Δημιουργία collector που θα φέρει τον leader TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(1, true); //Πραγματοποίηση αναζήτησης searcher.Search(query, topScoreDocCollector); //Προσθήκη αποτελεσμάτων σε λίστα ScoreDoc[] hits = topScoreDocCollector.TopDocs().ScoreDocs; //Απομονώνουμε τον Leader Article leader = new Article(); int docId = hits[0].Doc; float score = hits[0].Score; Document document = searcher.Doc(docId); leader.Score = Convert.ToDouble(score.ToString("0.0000")); leader.Id = Convert.ToInt32(document.Get("ID")); results.Add(leader); //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα if (!System.IO.Directory.Exists(indexPathFollower)) { throw new NullReferenceException("Index Does Not Exist"); } //Αρχικοποίηση μεταβλητών luceneIndexDirectoryFollowers = FSDirectory.Open(indexPathFollower); //Αρχικοποίηση μεταβλητών IndexSearcher searcherFollowers = new IndexSearcher(luceneIndexDirectoryFollowers); //Δημιουργία Searcher κειμένου MultiFieldQueryParser allFieldsSearcherFollowers = new MultiFieldQueryParser(LuceneVersion, fields, analyzer); //Filter filter = //new FieldValueFilter("Leader", new[] { leader.Id.ToString() }); //new QueryWrapperFilter(new TermQuery(new Term("Leader", leader.Id.ToString()))); // //QueryWrapperFilter(new WildcardQuery(new Term("Leader", leader.Id.ToString()))); ////FieldRangeFilter("Leader", leader.Id.ToString(), leader.Id.ToString(), true, true); //Parce το όρο αναζήτησης Query queryFollowers = allFieldsSearcherFollowers.Parse(searchTerm); //Δημιουργία collector που θα φέρει τα πρώτα 1000 αποτελέσματα TopScoreDocCollector topScoreDocCollectorFollowers = TopScoreDocCollector.Create(3200, true); //Πραγματοποίηση αναζήτησης searcherFollowers.Search(queryFollowers, topScoreDocCollectorFollowers); //Προσθήκη αποτελεσμάτων σε λίστα ScoreDoc[] Followershits = topScoreDocCollectorFollowers.TopDocs().ScoreDocs; foreach (ScoreDoc hitFollow in Followershits) { Article art = new Article(); int docIdFollower = hitFollow.Doc; float scoreFollower = hitFollow.Score; Document documentFollower = searcherFollowers.Doc(docIdFollower); art.Score = Convert.ToDouble(scoreFollower.ToString("0.0000")); art.Id = Convert.ToInt32(documentFollower.Get("ID")); int leaderID = Convert.ToInt32(documentFollower.Get("Leader")); if (leaderID == leader.Id) { results.Add(art); } } IEnumerable <Article> Articles = ArticleReader.ReadArticles(@"Data\cacm.all"); //Προσθέτουμε τα άρθρα στα αποτελέσματα και τα scor του κάθε άρθρου foreach (Article res in results) { foreach (Article article in Articles) { if (article.Id.ToString() == res.Id.ToString()) { Article art = new Article(); art = article; art.Score = res.Score; CompleteResults2.Add(art); //break; } } } //Επιστρέφουμε τα αποτελέσματα στο χρήστη luceneLeaderIndexDirectory.Dispose(); luceneIndexDirectoryFollowers.Dispose(); return(CompleteResults2); } else { return(CompleteResults2); } }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random, cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int numDocs = TestUtil.NextInt32(Random, 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random.Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.IndexWriter, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
public void TestTotalGroupCount() { string groupField = "author"; FieldType customType = new FieldType(); customType.IsStored = true; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "random text", Field.Store.YES)); doc.Add(new Field("id", "1", customType)); w.AddDocument(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random text blob", Field.Store.YES)); doc.Add(new Field("id", "2", customType)); w.AddDocument(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.Add(new Field("id", "3", customType)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV); doc.Add(new TextField("content", "some random text", Field.Store.YES)); doc.Add(new Field("id", "4", customType)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "5", customType)); w.AddDocument(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "random blob", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); IAbstractAllGroupsCollector <object> allGroupsCollector = CreateRandomCollector(groupField, canUseIDV); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector); assertEquals(4, allGroupsCollector.GroupCount); allGroupsCollector = CreateRandomCollector(groupField, canUseIDV); indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector); assertEquals(3, allGroupsCollector.GroupCount); allGroupsCollector = CreateRandomCollector(groupField, canUseIDV); indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector); assertEquals(2, allGroupsCollector.GroupCount); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random.Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); //provider.register(new MemoryCodec()); if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean()) { Codec.Default = TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle())); } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).SetStringValue(myID); Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = LuceneTestCase.Random.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.GetSizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new Int32Field("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
/// <summary> /// Split a given index into 3 indexes for training, test and cross validation tasks respectively /// </summary> /// <param name="originalIndex">an <see cref="AtomicReader"/> on the source index</param> /// <param name="trainingIndex">a <see cref="Directory"/> used to write the training index</param> /// <param name="testIndex">a <see cref="Directory"/> used to write the test index</param> /// <param name="crossValidationIndex">a <see cref="Directory"/> used to write the cross validation index</param> /// <param name="analyzer"><see cref="Analyzer"/> used to create the new docs</param> /// <param name="fieldNames">names of fields that need to be put in the new indexes or <c>null</c> if all should be used</param> /// <exception cref="IOException">if any writing operation fails on any of the indexes</exception> public virtual void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames) { #pragma warning disable 612, 618 // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer)); #pragma warning restore 612, 618 try { int size = originalIndex.MaxDoc; IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), int.MaxValue); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; int b = 0; // iterate over existing documents foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.Length > 0) { foreach (string fieldName in fieldNames) { doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft)); } } else { foreach (IIndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields) { if (storableField.GetReaderValue() != null) { doc.Add(new Field(storableField.Name, storableField.GetReaderValue(), ft)); } else if (storableField.GetBinaryValue() != null) { doc.Add(new Field(storableField.Name, storableField.GetBinaryValue(), ft)); } else if (storableField.GetStringValue() != null) { doc.Add(new Field(storableField.Name, storableField.GetStringValue(), ft)); } else if (storableField.NumericType != NumericFieldType.NONE) // LUCENENET specific - checking the NumricType property is quicker than the type conversion { // LUCENENET specific - need to pass invariant culture here (we are assuming the Field will be stored) // and we need to round-trip floating point numbers so we don't lose precision. if (storableField.NumericType == NumericFieldType.SINGLE || storableField.NumericType == NumericFieldType.DOUBLE) { // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564 doc.Add(new Field(storableField.Name, storableField.GetStringValue("R", CultureInfo.InvariantCulture), ft)); } else { doc.Add(new Field(storableField.Name, storableField.GetStringValue(CultureInfo.InvariantCulture), ft)); } } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio) { testWriter.AddDocument(doc); } else if (cvWriter.MaxDoc < size * _crossValidationRatio) { cvWriter.AddDocument(doc); } else { trainingWriter.AddDocument(doc); } b++; } } catch (Exception e) { throw new IOException("Exceptio in DatasetSplitter", e); } finally { testWriter.Commit(); cvWriter.Commit(); trainingWriter.Commit(); // close IWs testWriter.Dispose(); cvWriter.Dispose(); trainingWriter.Dispose(); } }
public virtual void TestRandom() { Random random = Random; int numberOfRuns = TestUtil.NextInt32(random, 3, 6); for (int indexIter = 0; indexIter < numberOfRuns; indexIter++) { IndexContext context = CreateIndexContext(); for (int searchIter = 0; searchIter < 100; searchIter++) { IndexSearcher searcher = NewSearcher(context.indexReader); bool useDv = context.dvType != DocValuesType.NONE && random.nextBoolean(); DocValuesType dvType = useDv ? context.dvType : DocValuesType.NONE; string term = context.contentStrings[random.nextInt(context.contentStrings.Length)]; Sort groupSort = new Sort(new SortField("id", SortFieldType.STRING)); int topN = 1 + random.nextInt(10); List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > expectedResult = CreateExpectedResult(context, term, groupSort, topN); IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, groupSort, groupField, topN); searcher.Search(new TermQuery(new Term("content", term)), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > actualResult = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); if (Verbose) { Console.WriteLine("Index iter=" + indexIter); Console.WriteLine("Search iter=" + searchIter); Console.WriteLine("1st pass collector class name=" + firstCollector.GetType().Name); Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name); Console.WriteLine("Search term=" + term); Console.WriteLine("DVType=" + dvType); Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false).toString()); Console.WriteLine("Expected:"); PrintGroups(expectedResult); Console.WriteLine("Actual:"); PrintGroups(actualResult); Console.Out.Flush(); } assertEquals(expectedResult.Count, actualResult.Count); for (int i = 0; i < expectedResult.size(); i++) { AbstractDistinctValuesCollector.IGroupCount <IComparable> expected = expectedResult[i]; AbstractDistinctValuesCollector.IGroupCount <IComparable> actual = actualResult[i]; AssertValues(expected.GroupValue, actual.GroupValue); assertEquals(expected.UniqueValues.Count(), actual.UniqueValues.Count()); List <IComparable> expectedUniqueValues = new List <IComparable>(expected.UniqueValues); expectedUniqueValues.Sort(nullComparer); List <IComparable> actualUniqueValues = new List <IComparable>(actual.UniqueValues); actualUniqueValues.Sort(nullComparer); for (int j = 0; j < expectedUniqueValues.size(); j++) { AssertValues(expectedUniqueValues[j], actualUniqueValues[j]); } } } context.indexReader.Dispose(); context.directory.Dispose(); } }
public static Hits Search(Query q, int revFirst, int revLast) { return(Searcher.Search(q, new RevisionFilter(revFirst, revLast))); }
public void TestBasics() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); var docs = new string[] { @"this is the end of the world right", @"is this it or maybe not", @"this is the end of the universe as we know it", @"there is the famous restaurant at the end of the universe" }; for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES)); doc.Add(NewTextField(@"field", docs[i], Field.Store.NO)); w.AddDocument(doc); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); query.Add(new Term("field", "universe")); query.Add(new Term("field", "right")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 3); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 2); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "is")); query.Add(new Term("field", "this")); query.Add(new Term("field", "end")); query.Add(new Term("field", "world")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } { CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F); query.Add(new Term("field", "restaurant")); query.Add(new Term("field", "universe")); TopDocs search = s.Search(query, 10); assertEquals(search.TotalHits, 1); assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id")); } r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random.NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random.NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.Terms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Size()); TermsEnum tvsEnum = tvs.Iterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv")) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
private static void GenerateHighlights(IList<Document> documents, IndexWriter writer, SearchCriteria criteria) { var documentHightlightMap = documents.ToDictionary(c => c._id.ToString()); var reader = DirectoryReader.Open(writer, true, true); var queryParser = new HighlighterQueryParser(writer.GetAnalyzer()); queryParser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); var query = queryParser.Parse(criteria.Query) .Rewrite(reader); var highlighter = CreateHighlighter(); var fieldQuery = highlighter.GetFieldQuery(query); var searcher = new IndexSearcher(reader); var topFieldDocs = searcher.Search(query, documents.Count, Sort.RELEVANCE); var scoreDocs = topFieldDocs.ScoreDocs; foreach (var sd in scoreDocs) { var bestFragments = highlighter.GetBestFragments(fieldQuery, reader, sd.Doc, Schema.StandardField.FULL_TEXT, FRAGMENT_SIZE, FRAGMENT_COUNT); var document = searcher.Doc(sd.Doc); var docId = document.Get(Schema.StandardField.ID); if (documentHightlightMap.ContainsKey(docId) && bestFragments.Length > 0) { var dictionary = documentHightlightMap[docId].AsDictionary(); var highlight = String.Join($"{Environment.NewLine} ... {Environment.NewLine}", bestFragments); dictionary[HIGHLIGHT_FIELD_NAME] = highlight; } } }
public virtual void TestSlowCompositeReaderWrapper() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader)); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(writer, searcher.IndexReader, dir); }
/// <summary> /// Searches the datasource using the specified criteria. Criteria is parsed by the query builder specified by /// <typeparamref /// name="QueryBuilderType" /> /// . /// </summary> /// <param name="scope">Name of the application.</param> /// <param name="criteria">The criteria.</param> /// <returns></returns> /// <exception cref="VirtoCommerce.Search.Providers.Lucene.LuceneSearchException"></exception> public virtual ISearchResults Search(string scope, ISearchCriteria criteria) { TopDocs docs = null; var folderName = this.GetFolderName(scope, criteria.DocumentType); var dir = FSDirectory.Open(new DirectoryInfo(this.GetDirectoryPath(folderName))); var searcher = new IndexSearcher(dir); var q = (Query)this.QueryBuilder.BuildQuery(criteria); Debug.WriteLine("Search Lucene Query:{0}", (object)q.ToString()); try { var numDocs = criteria.StartingRecord + criteria.RecordsToRetrieve; if (criteria.Sort != null) { var fields = criteria.Sort.GetSort(); docs = searcher.Search( q, null, numDocs, new Sort( fields.Select(field => new SortField(field.FieldName, field.DataType, field.IsDescending)) .ToArray())); } else { docs = searcher.Search(q, numDocs); } } catch (Exception ex) { throw new LuceneSearchException("Search exception", ex); } var results = new LuceneSearchResults(searcher, searcher.IndexReader, docs, criteria, q); // Cleanup here searcher.IndexReader.Dispose(); searcher.Dispose(); return results.Results; }
/// <summary> /// Поиск на основе ранее построенного индекса /// </summary> private static ICollection <string> Search( int forumID, string searchText, bool searchInText, bool searchInSubject, bool searchAuthor, bool searchInMyMessages, bool searchAnyWords, DateTime from, DateTime to) { var result = new List <string>(); var query = new BooleanQuery(); var analyzer = new RussianAnalyzer(Version.LUCENE_30); var indexPath = GetIndexDir(); var searchTextExists = !string.IsNullOrEmpty(searchText); #region Обработка строки // Сигнатура языка поиска - ** if (searchTextExists) { if (searchText.StartsWith(_signature)) { // Да, хотим использовать язык, отрезаем ** и считаем остаток строки написанным на языке поиска searchText = searchText.Substring(_signature.Length); } else { // Используем простой поиск: экранируем спецсимволы, получаем токены (пробел - разделитель), учитываем флажок searchAnyWords (AND/OR) // Порядок важен, первое - \\ var specChars = new[] { "\\", "+", "-", "&", "|", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":" }; searchText = specChars .Aggregate( searchText, (current, specChar) => current.Replace(specChar, "\\" + specChar)); var token = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (searchAnyWords) { searchText = string.Join(" ", token); } else { searchText = "+" + string.Join(" +", token); } } } #endregion if (forumID != -1) { query.Add( new TermQuery(new Term("gid", forumID.ToString())), Occur.MUST); } if (searchInMyMessages) { query.Add( new TermQuery(new Term("uid", Config.Instance.SelfId.ToString())), Occur.MUST); } //if (searchInQuestions) // bq.Add(new TermQuery(new Term("tid", "0")), true, false); if (from.Ticks != 0 || to.Ticks != 0) { var rq = new TermRangeQuery("dte", FormatDate(from), FormatDate(to), true, true); query.Add(rq, Occur.MUST); } if (searchTextExists) { var searchTextQuery = new BooleanQuery(); if (searchInText) { searchTextQuery.Add( new QueryParser(Version.LUCENE_29, "message", analyzer).Parse(searchText), Occur.SHOULD); } if (searchInSubject) { searchTextQuery.Add( new QueryParser(Version.LUCENE_29, "subject", analyzer).Parse(searchText), Occur.SHOULD); } if (searchAuthor) { searchTextQuery.Add( new QueryParser(Version.LUCENE_29, "usernick", analyzer).Parse(searchText), Occur.SHOULD); } query.Add(searchTextQuery, Occur.MUST); } var searcher = new IndexSearcher(indexPath, true); try { var topDocs = searcher.Search(query, _maxSearchReults); result .AddRange( topDocs .ScoreDocs .Select(scored => searcher.Doc(scored.Doc).Get("mid"))); } finally { searcher.Close(); } return(result); }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
/// <summary> /// Searches the specified phrase in the specified search fields. /// </summary> /// <param name="searchFields">The search fields.</param> /// <param name="phrase">The phrase to search.</param> /// <param name="searchOption">The search options.</param> /// <returns>A list of <see cref="SearchResult"/> items.</returns> public static List <SearchResult> Search(SearchField[] searchFields, string phrase, SearchOptions searchOption) { IIndexDirectoryProviderV30 indexDirectoryProvider = Collectors.IndexDirectoryProvider; Analyzer analyzer = new SimpleAnalyzer(); using (IndexSearcher searcher = new IndexSearcher(indexDirectoryProvider.GetDirectory(), false)) { string[] searchFieldsAsString = (from f in searchFields select f.AsString()).ToArray(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchFieldsAsString, analyzer); if (searchOption == SearchOptions.AllWords) { queryParser.DefaultOperator = QueryParser.Operator.AND; } if (searchOption == SearchOptions.AtLeastOneWord) { queryParser.DefaultOperator = QueryParser.Operator.OR; } try { Query query = queryParser.Parse(phrase); TopDocs topDocs = searcher.Search(query, 100); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b class=\"searchkeyword\">", "</b>"), new QueryScorer(query)); List <SearchResult> searchResults = new List <SearchResult>(topDocs.TotalHits); for (int i = 0; i < Math.Min(100, topDocs.TotalHits); i++) { Document doc = searcher.Doc(topDocs.ScoreDocs[i].Doc); SearchResult result = new SearchResult(); result.DocumentType = DocumentTypeFromString(doc.GetField(SearchField.DocumentType.AsString()).StringValue); result.Relevance = topDocs.ScoreDocs[i].Score * 100; switch (result.DocumentType) { case DocumentType.Page: PageDocument page = new PageDocument(); page.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; page.Title = doc.GetField(SearchField.Title.AsString()).StringValue; TokenStream tokenStream1 = analyzer.TokenStream(SearchField.Title.AsString(), new StringReader(page.Title)); page.HighlightedTitle = highlighter.GetBestFragments(tokenStream1, page.Title, 3, " [...] "); page.Content = doc.GetField(SearchField.Content.AsString()).StringValue; tokenStream1 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(page.Content)); page.HighlightedContent = highlighter.GetBestFragments(tokenStream1, page.Content, 3, " [...] "); result.Document = page; break; case DocumentType.Message: MessageDocument message = new MessageDocument(); message.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; message.DateTime = DateTime.Parse(doc.GetField(SearchField.MessageDateTime.AsString()).StringValue); message.Subject = doc.GetField(SearchField.Title.AsString()).StringValue; message.Body = doc.GetField(SearchField.Content.AsString()).StringValue; TokenStream tokenStream2 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(message.Body)); message.HighlightedBody = highlighter.GetBestFragments(tokenStream2, message.Body, 3, " [...] "); result.Document = message; break; case DocumentType.Attachment: PageAttachmentDocument attachment = new PageAttachmentDocument(); attachment.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue; attachment.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue; attachment.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue; TokenStream tokenStream3 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(attachment.FileContent)); attachment.HighlightedFileContent = highlighter.GetBestFragments(tokenStream3, attachment.FileContent, 3, " [...] "); result.Document = attachment; break; case DocumentType.File: FileDocument file = new FileDocument(); file.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue; file.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue; TokenStream tokenStream4 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(file.FileContent)); file.HighlightedFileContent = highlighter.GetBestFragments(tokenStream4, file.FileContent, 3, " [...]"); result.Document = file; break; case DocumentType.SourceControlFile: FileDocument scfile = new FileDocument(); scfile.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue; scfile.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue; TokenStream tokenStream5 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(scfile.FileContent)); scfile.HighlightedFileContent = highlighter.GetBestFragments(tokenStream5, scfile.FileContent, 3, " [...]"); result.Document = scfile; break; } searchResults.Add(result); } return(searchResults); } catch (ParseException) { return(new List <SearchResult>(0)); } } }
public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.Add(new StringField("body", "body", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs; Assert.AreEqual(0, result.Length, "The index Term should not be included."); result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs; Assert.AreEqual(1, result.Length, "The index Term should be included."); reader.Dispose(); dir.Dispose(); }
public IEnumerable <(Document Result, Dictionary <string, Dictionary <string, string[]> > Highlightings, ExplanationResult Explanation)> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { var method = query.Metadata.Query.Where as MethodExpression; if (method == null) { throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just an intersect() method call while it got {query.Metadata.Query.Where.Type} expression", query.Metadata.QueryText, query.QueryParameters); } var methodName = method.Name; if (string.Equals("intersect", methodName) == false) { throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just a single intersect() method call while it got '{methodName}' method", query.Metadata.QueryText, query.QueryParameters); } if (method.Arguments.Count <= 1) { throw new InvalidQueryException("The valid intersect query must have multiple intersect clauses.", query.Metadata.QueryText, query.QueryParameters); } var subQueries = new Query[method.Arguments.Count]; for (var i = 0; i < subQueries.Length; i++) { var whereExpression = method.Arguments[i] as QueryExpression; if (whereExpression == null) { throw new InvalidQueryException($"Invalid intersect query. The intersect clause at position {i} isn't a valid expression", query.Metadata.QueryText, query.QueryParameters); } subQueries[i] = GetLuceneQuery(documentsContext, query.Metadata, whereExpression, query.QueryParameters, _analyzer, _queryBuilderFactories); } //Not sure how to select the page size here??? The problem is that only docs in this search can be part //of the final result because we're doing an intersection query (but we might exclude some of them) var pageSize = GetPageSize(_searcher, query.PageSize); int pageSizeBestGuess = GetPageSize(_searcher, ((long)query.Start + query.PageSize) * 2); int skippedResultsInCurrentLoop = 0; int previousBaseQueryMatches = 0; var firstSubDocumentQuery = subQueries[0]; var sort = GetSort(query, _index, getSpatialField, documentsContext); using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state)) { //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); var currentBaseQueryMatches = search.ScoreDocs.Length; var intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs, _state); int intersectMatches; do { token.ThrowIfCancellationRequested(); if (skippedResultsInCurrentLoop > 0) { // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated) pageSizeBestGuess = pageSizeBestGuess * 2; search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); previousBaseQueryMatches = currentBaseQueryMatches; currentBaseQueryMatches = search.ScoreDocs.Length; intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs, _state); } for (var i = 1; i < subQueries.Length; i++) { _searcher.Search(subQueries[i], null, intersectionCollector, _state); } var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); intersectMatches = currentIntersectResults.Count; skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches; } while (intersectMatches < pageSize && //stop if we've got enough results to satisfy the pageSize currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize, //because it's before the INTERSECTION has been applied, so only some of those results make it out. //Trying to give an accurate answer is going to be too costly, so we aren't going to try. totalResults.Value = search.TotalHits; skippedResults.Value = skippedResultsInCurrentLoop; //Using the final set of results in the intersectionCollector int returnedResults = 0; for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++) { var indexResult = intersectResults[i]; var document = _searcher.Doc(indexResult.LuceneId, _state); if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } var result = retriever.Get(document, indexResult.Score, _state); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } returnedResults++; yield return(result, null, null); if (returnedResults == pageSize) { yield break; } } } }
// LUCENE-1404 private int HitCount(IndexSearcher searcher, string word) { return searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits; }
static void Main(string[] args) { var options = new DirectoryExternalStorageOptions("indexing"); var input = File.OpenRead(@"C:\Users\Ayende\Downloads\Crimes_-_2001_to_present.csv"); //var sorter = new ExternalSorter(input, options, new int[] //{ // 1,// case number // 4, // ICHR //}); //var sp = Stopwatch.StartNew(); //sorter.Sort(); //Console.WriteLine(sp.Elapsed); //var r = new SourceReader( // File.OpenRead(@"C:\work\ExternalSorting\ExternalSorting.Tryouts\bin\Debug\indexing\0.index"), // Encoding.UTF8, new[] {0, 1}); //r.SetPositionToLineAt(1000); //var result = r.ReadFromStream().First(); //var prev = new IndexEntry //{ // Value = new ArraySegment<char>(new char[0]) //}; //int entries = 0; //for (int i = 0; i < r.NumberOfPages; i++) //{ // while (true) // { // var entry = r.Read(); // if (entry == null) // break; // entries++; // var match = Utils.CompareIndexEntries(prev, entry); // Console.WriteLine(new string(prev.Value.Array, prev.Value.Offset, prev.Value.Count) + " - " + new string(entry.Value.Array, entry.Value.Offset, entry.Value.Count) + " = " + match); // var array = new char[entry.Value.Count]; // Array.Copy(entry.Value.Array, entry.Value.Offset, array, 0, entry.Value.Count); // prev.Value = new ArraySegment<char>(array); // } //} //Console.WriteLine(); //Console.WriteLine(entries); var searcher = new IndexSearcher(input, File.OpenRead(@"C:\work\ExternalSorting\ExternalSorting.Tryouts\bin\Debug\indexing\0.index"), Encoding.UTF8); for (int i = 0; i < 10; i++) { var sp = Stopwatch.StartNew(); foreach (var line in searcher.Search(@"HT574031")) { Console.WriteLine(line); } Console.WriteLine(sp.Elapsed); } }