Beispiel #1
0
        public int DocumentCount()
        {
            EnsureSearcher();
            var docs = Searcher?.Search(new MatchAllDocsQuery(), 1);

            return(docs?.TotalHits ?? 0);
        }
Beispiel #2
0
        public int DocumentCount()
        {
            EnsureSearcher();
            var totalHitsCollector = new TotalHitCountCollector();

            Searcher?.Search(new MatchAllDocsQuery(), totalHitsCollector);
            return(totalHitsCollector.TotalHits);
        }
Beispiel #3
0
  String q = args[1];            // B

  public static void search(String indexDir, String q) {
    Directory dir = FSDirectory.Open(new System.IO.FileInfo(indexDir)); // C
    IndexSearcher searcher = new IndexSearcher(dir, true); // D
    QueryParser parser = new QueryParser("contents",
                                         new StandardAnalyzer(Version.LUCENE_CURRENT)); // E
    Query query = parser.Parse(q); // E
    Lucene.Net.Search.TopDocs hits = searcher.Search(query, 10); // F
    System.Console.WriteLine("Found " +
                             hits.totalHits +
                             " document(s) that matched query '" + q + "':");
    for (int i = 0; i < hits.scoreDocs.Length; i++) {
      ScoreDoc scoreDoc = hits.ScoreDocs[i];         // G
      Document doc = searcher.Doc(scoreDoc.doc);     // G
      System.Console.WriteLine(doc.Get("filename")); // G
    }
    searcher.Close();                // H
}
Beispiel #4
0
        /// <summary>
        /// Search into Lucene index.
        /// If the <see cref="LuceneSearcherConfiguration"/> is not correct return null.
        /// </summary>
        /// <param name="searchConfiguration"></param>
        /// <returns></returns>
        public List <ILogViewModel> Search(LuceneSearcherConfiguration searchConfiguration)
        {
            if (!CheckSearchConfiguration(searchConfiguration))
            {
                return(null);
            }

            if (searchConfiguration.ESearchMethod == ESearchMethod.FullText)
            {
                return(Search(searchConfiguration, new MultiFieldQueryParser
                              (
                                  LuceneVersion.LUCENE_48,
                                  searchConfiguration.Fields,
                                  new StandardAnalyzer(LuceneVersion.LUCENE_48)
                              ).Parse(searchConfiguration.Query)));
            }

            return(searchConfiguration.WantAll
                ? Search(searchConfiguration, GetAll(searchConfiguration.All))
                : CreateLogsResult(_indexSearcher?.Search(CreateQuery(searchConfiguration), searchConfiguration.MaxResult, _sort)));
        }
Beispiel #5
0
    public void TestLazy()
    {
        int         id     = Random.nextInt(NUM_DOCS);
        IndexReader reader = DirectoryReader.Open(dir);

        try
        {
            Query         q        = new TermQuery(new Term("docid", "" + id));
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[]    hits     = searcher.Search(q, 100).ScoreDocs;
            assertEquals("Too many docs", 1, hits.Length);
            LazyTestingStoredFieldVisitor visitor
                = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                    FIELDS);
            reader.Document(hits[0].Doc, visitor);
            Document d = visitor.doc;

            int numFieldValues = 0;
            IDictionary <string, int> fieldValueCounts = new JCG.Dictionary <string, int>();

            // at this point, all FIELDS should be Lazy and unrealized
            foreach (IIndexableField f in d)
            {
                numFieldValues++;
                if (f.Name.Equals("never_load", StringComparison.Ordinal))
                {
                    fail("never_load was loaded");
                }
                if (f.Name.Equals("load_later", StringComparison.Ordinal))
                {
                    fail("load_later was loaded on first pass");
                }
                if (f.Name.Equals("docid", StringComparison.Ordinal))
                {
                    assertFalse(f.Name, f is LazyDocument.LazyField);
                }
                else
                {
                    if (!fieldValueCounts.TryGetValue(f.Name, out int count))
                    {
                        count = 0;
                    }
                    count++;
                    fieldValueCounts.Put(f.Name, count);
                    assertTrue(f.Name + " is " + f.GetType(),
                               f is LazyDocument.LazyField);
                    LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                    assertFalse(f.Name + " is loaded", lf.HasBeenLoaded);
                }
            }
            Console.WriteLine("numFieldValues == " + numFieldValues);
            assertEquals("numFieldValues", 1 + (NUM_VALUES * FIELDS.Length),
                         numFieldValues);

            foreach (string field in fieldValueCounts.Keys)
            {
                assertEquals("fieldName count: " + field,
                             NUM_VALUES, fieldValueCounts[field]);
            }

            // pick a single field name to load a single value
            string            fieldName   = FIELDS[Random.nextInt(FIELDS.Length)];
            IIndexableField[] fieldValues = d.GetFields(fieldName);
            assertEquals("#vals in field: " + fieldName,
                         NUM_VALUES, fieldValues.Length);
            int valNum = Random.nextInt(fieldValues.Length);
            assertEquals(id + "_" + fieldName + "_" + valNum,
                         fieldValues[valNum].GetStringValue());

            // now every value of fieldName should be loaded
            foreach (IIndexableField f in d)
            {
                if (f.Name.Equals("never_load", StringComparison.Ordinal))
                {
                    fail("never_load was loaded");
                }
                if (f.Name.Equals("load_later", StringComparison.Ordinal))
                {
                    fail("load_later was loaded too soon");
                }
                if (f.Name.Equals("docid", StringComparison.Ordinal))
                {
                    assertFalse(f.Name, f is LazyDocument.LazyField);
                }
                else
                {
                    assertTrue(f.Name + " is " + f.GetType(),
                               f is LazyDocument.LazyField);
                    LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                    assertEquals(f.Name + " is loaded?",
                                 lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                }
            }

            // use the same LazyDoc to ask for one more lazy field
            visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                        "load_later");
            reader.Document(hits[0].Doc, visitor);
            d = visitor.doc;

            // ensure we have all the values we expect now, and that
            // adding one more lazy field didn't "unload" the existing LazyField's
            // we already loaded.
            foreach (IIndexableField f in d)
            {
                if (f.Name.Equals("never_load", StringComparison.Ordinal))
                {
                    fail("never_load was loaded");
                }
                if (f.Name.Equals("docid", StringComparison.Ordinal))
                {
                    assertFalse(f.Name, f is LazyDocument.LazyField);
                }
                else
                {
                    assertTrue(f.Name + " is " + f.GetType(),
                               f is LazyDocument.LazyField);
                    LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                    assertEquals(f.Name + " is loaded?",
                                 lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                }
            }

            // even the underlying doc shouldn't have never_load
            assertNull("never_load was loaded in wrapped doc",
                       visitor.lazyDoc.GetDocument().GetField("never_load"));
        }
        finally
        {
            reader.Dispose();
        }
    }
Beispiel #6
0
 // LUCENE-1404
 private int HitCount(IndexSearcher searcher, string word)
 {
     return(searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits);
 }
Beispiel #7
0
        private static List <SearchDoc> lucene_search(Guid applicationId, int lowerBoundary, int count, ref Query query,
                                                      ref IndexSearcher searcher, bool additionalId, bool title, bool description, bool content, bool tags, bool fileContent)
        {
            try
            {
                List <SearchDoc> listDocs = new List <SearchDoc>();

                TopDocs hits = searcher.Search(query, lowerBoundary + count + (count / 2));
                FastVectorHighlighter fvHighlighter = new FastVectorHighlighter(true, true);

                for (int i = lowerBoundary, lnt = hits.ScoreDocs.Length; i < lnt; ++i)
                {
                    ScoreDoc sd = hits.ScoreDocs[i];

                    string addIdFr = !additionalId ? string.Empty :
                                     fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                   searcher.IndexReader, docId: sd.Doc, fieldName: "AdditionalID", fragCharSize: 200);
                    string titleFr = !title ? string.Empty :
                                     fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                   searcher.IndexReader, docId: sd.Doc, fieldName: "Title", fragCharSize: 200);
                    string descFr = !description ? string.Empty :
                                    fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                  searcher.IndexReader, docId: sd.Doc, fieldName: "Description", fragCharSize: 200);
                    string contentFr = !content ? string.Empty :
                                       fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                     searcher.IndexReader, docId: sd.Doc, fieldName: "Content", fragCharSize: 200);
                    string tagsFr = !tags ? string.Empty :
                                    fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                  searcher.IndexReader, docId: sd.Doc, fieldName: "Tags", fragCharSize: 200);
                    string fileFr = !fileContent ? string.Empty :
                                    fvHighlighter.GetBestFragment(fvHighlighter.GetFieldQuery(query),
                                                                  searcher.IndexReader, docId: sd.Doc, fieldName: "FileContent", fragCharSize: 200);

                    if (!string.IsNullOrEmpty(titleFr))
                    {
                        titleFr = titleFr.Trim();
                    }
                    if (!string.IsNullOrEmpty(addIdFr))
                    {
                        addIdFr = addIdFr.Trim();
                    }

                    string highlightedText = ((string.IsNullOrEmpty(descFr) ? string.Empty : descFr + " ") +
                                              (string.IsNullOrEmpty(contentFr) ? string.Empty : contentFr + " ") +
                                              (string.IsNullOrEmpty(tagsFr) ? string.Empty : tagsFr + " ") +
                                              (string.IsNullOrEmpty(fileFr) ? string.Empty : fileFr)).Trim();

                    if (string.IsNullOrEmpty(addIdFr) && string.IsNullOrEmpty(titleFr) && string.IsNullOrEmpty(highlightedText))
                    {
                        break;
                    }

                    Document  doc  = searcher.Doc(sd.Doc);
                    SearchDoc item = SearchDoc.ToSearchDoc(doc);
                    item.Description = highlightedText;
                    listDocs.Add(item);
                }

                return(listDocs);
            }
            catch (Exception ex)
            {
                LogController.save_error_log(applicationId, null, "SearchIndexDocuments", ex, ModuleIdentifier.SRCH);
                return(new List <SearchDoc>());
            }
        }
 public virtual void TestRandomSearchPerformance()
 {
     IndexSearcher searcher = new IndexSearcher(Reader);
     foreach (Term t in SampleTerms)
     {
         TermQuery query = new TermQuery(t);
         TopDocs topDocs = searcher.Search(query, 10);
         Assert.IsTrue(topDocs.TotalHits > 0);
     }
 }
        public virtual void TestSimple()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.NUMERIC,
                DocValuesType.BINARY,
                DocValuesType.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            var cmp = Comparer <AbstractDistinctValuesCollector.IGroupCount <IComparable> > .Create((groupCount1, groupCount2) => {
                if (groupCount1.GroupValue == null)
                {
                    if (groupCount2.GroupValue == null)
                    {
                        return(0);
                    }
                    return(-1);
                }
                else if (groupCount2.GroupValue == null)
                {
                    return(1);
                }
                else
                {
                    return(groupCount1.GroupValue.CompareTo(groupCount2.GroupValue));
                }
            });

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparer);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparer);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public void TestNumericRangeQuery()
        {
            // doesn't currently highlight, but make sure it doesn't cause exception either
            query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);
            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                //Console.WriteLine("\t" + result);
            }


        }
        public void TestRegexQuery()
        {
            const int maxNumFragmentsRequired = 2;

            query = new RegexQuery(new Term(FIELD_NAME, "ken.*"));
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);

            var scorer = new QueryScorer(query, FIELD_NAME);
            var highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }
 public void DoSearching(Query unReWrittenQuery)
 {
     searcher = new IndexSearcher(ramDir, true);
     // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
     // you must use a rewritten query!
     query = unReWrittenQuery.Rewrite(reader);
     Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
     hits = searcher.Search(query, null, 1000);
 }
        private void SearchIndex()
        {
            String q = "t_text1:random";
            QueryParser parser = new QueryParser(TEST_VERSION, "t_text1", a);
            Query query = parser.Parse(q);
            IndexSearcher searcher = new IndexSearcher(dir, true);
            // This scorer can return negative idf -> null fragment
            IScorer scorer = new QueryTermScorer(query, searcher.IndexReader, "t_text1");
            // This scorer doesn't use idf (patch version)
            //Scorer scorer = new QueryTermScorer( query, "t_text1" );
            Highlighter h = new Highlighter(scorer);

            TopDocs hits = searcher.Search(query, null, 10);
            for (int i = 0; i < hits.TotalHits; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String result = h.GetBestFragment(a, "t_text1", doc.Get("t_text1"));
                Console.WriteLine("result:" + result);
                Assert.AreEqual(result, "more <B>random</B> words for second field");
            }
            searcher.Close();
        }
        public void TestUnRewrittenQuery()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        numHighlights = 0;
                                        // test to show how rewritten query can still be used
                                        searcher = new IndexSearcher(ramDir, true);
                                        Analyzer analyzer = new StandardAnalyzer(TEST_VERSION);

                                        QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
                                        Query query = parser.Parse("JF? or Kenned*");
                                        Console.WriteLine("Searching with primitive query");
                                        // forget to set this and...
                                        // query=query.Rewrite(reader);
                                        TopDocs hits = searcher.Search(query, null, 1000);

                                        // create an instance of the highlighter with the tags used to surround
                                        // highlighted text
                                        // QueryHighlightExtractor highlighter = new
                                        // QueryHighlightExtractor(this,
                                        // query, new StandardAnalyzer(TEST_VERSION));

                                        int maxNumFragmentsRequired = 3;

                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));
                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this, false);

                                            highlighter.TextFragmenter = new SimpleFragmenter(40);

                                            String highlightedText = highlighter.GetBestFragments(tokenStream, text,
                                                                                                  maxNumFragmentsRequired,
                                                                                                  "...");

                                            Console.WriteLine(highlightedText);
                                        }
                                        // We expect to have zero highlights if the query is multi-terms and is
                                        // not
                                        // rewritten!
                                        Assert.IsTrue(numHighlights == 0,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");
                                    };

            helper.Start();
        }
Beispiel #15
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.IsTrue(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                fail("should have hit exc");
            }
            catch (ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                fail("should have hit exc");
            }
            catch (ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #16
0
        public virtual void TestRandomSampling()
        {
            Directory dir     = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter       writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random();

            // NRT open
            IndexSearcher searcher   = NewSearcher(writer.Reader);
            var           taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            IOUtils.Dispose(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.NotNull(collectRandomZeroResults.GetMatchingDocs());

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs())
            {
                Assert.AreEqual(0, doc.TotalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong());      // no sampling
            RandomSamplingFacetsCollector random10Percent  = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts  = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts     = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result  = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult     = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.True(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.True(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.Value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.Value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.True(sigma < 200);
            Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public void TestConstantScoreMultiTermQuery()
        {

            numHighlights = 0;

            query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
            ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
            searcher = new IndexSearcher(ramDir, true);
            // can't rewrite ConstantScore if you want to highlight it -
            // it rewrites to ConstantScoreQuery which cannot be highlighted
            // query = unReWrittenQuery.Rewrite(reader);
            Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
            hits = searcher.Search(query, null, 1000);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try null field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, null);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try default field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader       = w.GetReader();
            AtomicReader    wrapper      = SlowCompositeReaderWrapper.Wrap(reader);
            string          field        = @"body";
            Terms           terms        = wrapper.GetTerms(field);
            var             lowFreqQueue = new AnonymousPriorityQueue(this, 5);

            Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
            try
            {
                TermsEnum iterator = terms.GetIterator(null);
                while (iterator.Next() != null)
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms  = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms   = QueueToList(lowFreqQueue);
                IndexSearcher      searcher   = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
                }

                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch     = searcher.Search(cq, reader.MaxDoc);
                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
        public void TestQueryScorerHits()
        {
            Analyzer analyzer = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
            query = qp.Parse("\"very long\"");
            searcher = new IndexSearcher(ramDir, true);
            TopDocs hits = searcher.Search(query, 10);

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(scorer);


            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String storedField = doc.Get(FIELD_NAME);

                TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc,
                                                                    FIELD_NAME, doc, analyzer);

                IFragmenter fragmenter = new SimpleSpanFragmenter(scorer);

                highlighter.TextFragmenter = fragmenter;

                String fragment = highlighter.GetBestFragment(stream, storedField);

                Console.WriteLine(fragment);
            }
        }
 private int RunQuery(IndexSearcher s, Query q)
 {
     s.Search(q, 10);
     int hitCount = s.Search(q, null, 10, new Sort(new SortField("title", SortField.Type_e.STRING))).TotalHits;
     if (DefaultCodecSupportsDocValues())
     {
         Sort dvSort = new Sort(new SortField("title", SortField.Type_e.STRING));
         int hitCount2 = s.Search(q, null, 10, dvSort).TotalHits;
         Assert.AreEqual(hitCount, hitCount2);
     }
     return hitCount;
 }
Beispiel #21
0
        /// <summary>
        /// Searches the index for the querytext and displays a ranked list of results to the screen
        /// </summary>
        /// <param name="querytext">The text to search the index</param>
        private string SearchAndDisplayResults(string querytext, long qid, List <long> relevantList)
        {
            System.Console.WriteLine("Searching for " + querytext);
            querytext = querytext.ToLower();
            Query query = parser.Parse(querytext);

            System.Console.WriteLine($"Searching for { query.ToString()}");

            TopDocs results = searcher.Search(query, MAX_QUERY);

            // create highlighter - using strong tag to highlight in this case (change as needed)
            //IFormatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>");
            IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");

            // excerpt set to 200 characters in length
            var fragmenter  = new SimpleFragmenter(3000);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = fragmenter
            };

            long            rank           = 0;
            float           topscore       = 0f;
            long            foundrelevants = 0;
            List <TrecItem> logItems       = new List <TrecItem>();

            SearchedListViewModel.DeleteAll();
            foreach (ScoreDoc scoreDoc in results.ScoreDocs)
            {
                if (rank == 0)
                {
                    topscore = scoreDoc.Score;
                }
                rank++;
                Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);
                long id = Convert.ToInt64(doc.Get(PID_FN).ToString());
                CollectionPassage ps = collectionProvider.Passages[id];

                // Logging Trec
                logItems.Add(new TrecItem(0, id, rank, scoreDoc.Score));

                // get highlighted fragment
                TokenStream stream      = analyzer.TokenStream("", new StringReader(ps.passage_text));
                string      highlighted = highlighter.GetBestFragment(stream, ps.passage_text);

                //string url2 = doc.Get(TEXT_FN).ToString();
                //Console.WriteLine("Rank " + rank + " text " + myFieldValue);
                if (highlighted == null)
                {
                    highlighted = ps.passage_text;
                }
                if (relevantList.Contains(id))
                {
                    foundrelevants++;
                }
                SearchedListViewModel.Add(scoreDoc.Score / topscore, id, ps.GetTitle(), ps.url, highlighted, relevantList.Contains(id));

                //Console.WriteLine("==>" + highlighted);
            }

            StatusBarViewModel.Instance.NumRelevants = "Num Relevants : " + foundrelevants.ToString() + "/" + relevantList.Count.ToString();
            StatusBarViewModel.Instance.NumSearch    = "Num Searched :" + results.ScoreDocs.Length.ToString();
            // Logging Trec
            trecLogger.Logging(qid, logItems);

            //Console.WriteLine(string.Join(",", relevantList));
            return(query.ToString());
        }
Beispiel #22
0
 // Make sure the documents returned by the search match the expected list
 // Copied from TestSort.java
 private void AssertMatches(IndexSearcher searcher, Query query, Sort sort, string expectedResult)
 {
     ScoreDoc[] result = searcher.Search(query, null, 1000, sort).ScoreDocs;
     StringBuilder buff = new StringBuilder(10);
     int n = result.Length;
     for (int i = 0; i < n; ++i)
     {
         Document doc = searcher.Doc(result[i].Doc);
         IndexableField[] v = doc.GetFields("tracer");
         for (int j = 0; j < v.Length; ++j)
         {
             buff.Append(v[j].StringValue);
         }
     }
     Assert.AreEqual(expectedResult, buff.ToString());
 }
Beispiel #23
0
        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="numSug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int numSug, IndexReader ir, System.String field, bool morePopular)
        {    // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();

            try
            {
                float min        = this.minScore;
                int   lengthWord = word.Length;

                int freq     = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (!morePopular && freq > 0)
                {
                    return(new String[] { word });
                }

                var      query = new BooleanQuery();
                String[] grams;
                String   key;

                var alreadySeen = new HashSet <string>();
                for (var ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {
                    key = "gram" + ng;           // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0)
                    {                                               // should we boost prefixes?
                        Add(query, "start" + ng, grams[0], bStart); // matches start of word
                    }
                    if (bEnd > 0)
                    {                                                          // should we boost suffixes
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int         stop    = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {
                    sugWord.termString = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word

                    // don't suggest a word for itself, that would be silly
                    if (sugWord.termString.Equals(word))
                    {
                        continue;
                    }

                    // edit distance
                    sugWord.score = sd.GetDistance(word, sugWord.termString);
                    if (sugWord.score < min)
                    {
                        continue;
                    }

                    if (ir != null && field != null)
                    {                                                                   // use the user index
                        sugWord.freq = ir.DocFreq(new Term(field, sugWord.termString)); // freq in the index
                        // don't suggest a word that is not present in the field
                        if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1)
                        {
                            continue;
                        }
                    }

                    if (alreadySeen.Add(sugWord.termString) == false)                     // we already seen this word, no point returning it twice
                    {
                        continue;
                    }

                    sugQueue.InsertWithOverflow(sugWord);
                    if (sugQueue.Size() == numSug)
                    {
                        // if queue full, maintain the minScore score
                        min = ((SuggestWord)sugQueue.Top()).score;
                    }
                    sugWord = new SuggestWord();
                }

                // convert to array string
                String[] list = new String[sugQueue.Size()];
                for (int i = sugQueue.Size() - 1; i >= 0; i--)
                {
                    list[i] = ((SuggestWord)sugQueue.Pop()).termString;
                }

                return(list);
            }
            finally
            {
                ReleaseSearcher(indexSearcher);
            }
        }
        public static List <Obj> SearchByLoai(string q, string loai, int from, int size, out int total)
        {
            var directory   = FSDirectory.Open(new DirectoryInfo(Dic));
            var analyzer    = new StandardAnalyzer(Version.LUCENE_29);
            var indexReader = IndexReader.Open(directory, true);
            var indexSearch = new IndexSearcher(indexReader);

            var mainQuery = new BooleanQuery();

            if (!string.IsNullOrEmpty(q))
            {
                var queryParser = new QueryParser(Version.LUCENE_29, "SearchContent", analyzer);
                var query       = queryParser.Parse(q);
                mainQuery.Add(query, BooleanClause.Occur.MUST);
            }
            var queryParserLoai = new QueryParser(Version.LUCENE_29, "Loai", analyzer);
            var queryLoai       = queryParserLoai.Parse(loai);

            mainQuery.Add(queryLoai, BooleanClause.Occur.MUST);

            var resultDocs = indexSearch.Search(mainQuery, indexReader.MaxDoc());
            var hits       = resultDocs.scoreDocs;

            total = hits.Length;
            var list = hits.Select(hit => indexSearch.Doc(hit.doc)).Select(documentFromSearcher => new Obj()
            {
                Kieu =
                    documentFromSearcher
                    .Get(
                        "Loai")
                ,
                RowId =
                    new Guid(
                        documentFromSearcher
                        .Get(
                            "RowId"))
                ,
                Id =
                    new Guid(
                        documentFromSearcher
                        .Get(
                            "ID"))
                ,
                Url =
                    documentFromSearcher
                    .Get(
                        "Url")
                ,
                NoiDung =
                    documentFromSearcher
                    .Get(
                        "NoiDung")
                ,
                Ten =
                    documentFromSearcher
                    .Get(
                        "Ten")
            }).Skip(
                from).Take(
                size).ToList();

            indexSearch.Close();
            directory.Close();
            return(list);
        }
Beispiel #25
0
        public IEnumerable <Article> Search(string searchTerm, string[] fields)
        {  //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα
            if (!System.IO.Directory.Exists(indexPath))
            {
                throw new NullReferenceException("Index Does Not Exist");
            }
            //Αρχικοποίηση μεταβλητών
            luceneIndexDirectory = FSDirectory.Open(indexPath);
            List <Article> CompleteResults = new List <Article>();

            if (searchTerm != "" && fields.Length != 0)
            {
                //Αρχικοποίηση μεταβλητών
                IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory);
                //Δημιουργία Searcher κειμένου
                MultiFieldQueryParser allFieldsSearcher =
                    new MultiFieldQueryParser(LuceneVersion, fields, analyzer);

                //Parce το όρο αναζήτησης
                Query query = allFieldsSearcher.Parse(searchTerm);
                //Δημιουργία collector που θα φέρει τα 100 πρώτα αποτελέσματα
                TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(3200, true);
                //Πραγματοποίηση αναζήτησης
                searcher.Search(query, topScoreDocCollector);
                //Προσθήκη αποτελεσμάτων σε λίστα
                ScoreDoc[]     hits    = topScoreDocCollector.TopDocs().ScoreDocs;
                List <Article> results = new List <Article>();
                //Ανατρέχουμε τη λίστα αποτελεσμάτων με τη λίστα των άρθρων για να
                //επιστρέψουμε στο χρήστη τα ολόκληρα τα άρθρα.
                foreach (ScoreDoc hit in hits)
                {
                    Article art   = new Article();
                    int     docId = hit.Doc;
                    float   score = hit.Score;

                    Document document = searcher.Doc(docId);

                    art.Score = Convert.ToDouble(score.ToString("0.0000"));
                    art.Id    = Convert.ToInt32(document.Get("ID"));

                    results.Add(art);
                }

                IEnumerable <Article> Articles = ArticleReader.ReadArticles(@"Data\cacm.all");
                //Προσθέτουμε τα άρθρα στα αποτελέσματα και τα scor του κάθε άρθρου
                foreach (Article item in results)
                {
                    foreach (Article article in Articles)
                    {
                        if (article.Id == item.Id)
                        {
                            Article art = new Article();
                            art       = article;
                            art.Score = item.Score;
                            CompleteResults.Add(art);
                            break;
                        }
                    }
                }
                luceneIndexDirectory.Dispose();
                //Επιστρέφουμε τα αποτελέσματα στο χρήστη
                return(CompleteResults.OrderByDescending(x => x.Score));
            }
            else
            {
                return(CompleteResults);
            }
        }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            IBits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IIndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IIndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.GetStringValue());

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue());

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.GetStringValue());
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.GetTerms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.Int32BitsToSingle((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.SetDocument(i);
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
Beispiel #27
0
        public IEnumerable <Article> AdvancedSearch(string searchTerm, string[] fields)
        {  //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα
            if (!System.IO.Directory.Exists(indexPathLead))
            {
                throw new NullReferenceException("Index Does Not Exist");
            }
            //Αρχικοποίηση μεταβλητών
            luceneLeaderIndexDirectory = FSDirectory.Open(indexPathLead);
            List <Article> results          = new List <Article>();
            List <Article> CompleteResults2 = new List <Article>();

            if (searchTerm != "" && fields.Length != 0)
            {
                //Αρχικοποίηση μεταβλητών
                IndexSearcher searcher = new IndexSearcher(luceneLeaderIndexDirectory);
                //Δημιουργία Searcher κειμένου
                MultiFieldQueryParser allFieldsSearcher =
                    new MultiFieldQueryParser(LuceneVersion, fields, analyzer);

                //Parce το όρο αναζήτησης
                Query query = allFieldsSearcher.Parse(searchTerm);
                //Δημιουργία collector που θα φέρει τον leader
                TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(1, true);
                //Πραγματοποίηση αναζήτησης
                searcher.Search(query, topScoreDocCollector);
                //Προσθήκη αποτελεσμάτων σε λίστα
                ScoreDoc[] hits = topScoreDocCollector.TopDocs().ScoreDocs;

                //Απομονώνουμε τον Leader
                Article  leader   = new Article();
                int      docId    = hits[0].Doc;
                float    score    = hits[0].Score;
                Document document = searcher.Doc(docId);
                leader.Score = Convert.ToDouble(score.ToString("0.0000"));
                leader.Id    = Convert.ToInt32(document.Get("ID"));
                results.Add(leader);

                //Έλεγχος αν υπάρχει το ευρετήριο αν όχι πέτα το μήνυμα
                if (!System.IO.Directory.Exists(indexPathFollower))
                {
                    throw new NullReferenceException("Index Does Not Exist");
                }
                //Αρχικοποίηση μεταβλητών
                luceneIndexDirectoryFollowers = FSDirectory.Open(indexPathFollower);

                //Αρχικοποίηση μεταβλητών
                IndexSearcher searcherFollowers = new IndexSearcher(luceneIndexDirectoryFollowers);
                //Δημιουργία Searcher κειμένου
                MultiFieldQueryParser allFieldsSearcherFollowers =
                    new MultiFieldQueryParser(LuceneVersion, fields, analyzer);
                //Filter filter = //new FieldValueFilter("Leader", new[] { leader.Id.ToString() });
                //new QueryWrapperFilter(new TermQuery(new Term("Leader", leader.Id.ToString())));
                //    //QueryWrapperFilter(new WildcardQuery(new Term("Leader", leader.Id.ToString())));
                ////FieldRangeFilter("Leader", leader.Id.ToString(), leader.Id.ToString(), true, true);

                //Parce το όρο αναζήτησης
                Query queryFollowers = allFieldsSearcherFollowers.Parse(searchTerm);
                //Δημιουργία collector που θα φέρει τα πρώτα 1000 αποτελέσματα
                TopScoreDocCollector topScoreDocCollectorFollowers = TopScoreDocCollector.Create(3200, true);
                //Πραγματοποίηση αναζήτησης
                searcherFollowers.Search(queryFollowers, topScoreDocCollectorFollowers);
                //Προσθήκη αποτελεσμάτων σε λίστα
                ScoreDoc[] Followershits = topScoreDocCollectorFollowers.TopDocs().ScoreDocs;

                foreach (ScoreDoc hitFollow in Followershits)
                {
                    Article art           = new Article();
                    int     docIdFollower = hitFollow.Doc;
                    float   scoreFollower = hitFollow.Score;

                    Document documentFollower = searcherFollowers.Doc(docIdFollower);

                    art.Score = Convert.ToDouble(scoreFollower.ToString("0.0000"));
                    art.Id    = Convert.ToInt32(documentFollower.Get("ID"));
                    int leaderID = Convert.ToInt32(documentFollower.Get("Leader"));

                    if (leaderID == leader.Id)
                    {
                        results.Add(art);
                    }
                }

                IEnumerable <Article> Articles = ArticleReader.ReadArticles(@"Data\cacm.all");
                //Προσθέτουμε τα άρθρα στα αποτελέσματα και τα scor του κάθε άρθρου
                foreach (Article res in results)
                {
                    foreach (Article article in Articles)
                    {
                        if (article.Id.ToString() == res.Id.ToString())
                        {
                            Article art = new Article();
                            art       = article;
                            art.Score = res.Score;
                            CompleteResults2.Add(art);
                            //break;
                        }
                    }
                }
                //Επιστρέφουμε τα αποτελέσματα στο χρήστη
                luceneLeaderIndexDirectory.Dispose();
                luceneIndexDirectoryFollowers.Dispose();
                return(CompleteResults2);
            }
            else
            {
                return(CompleteResults2);
            }
        }
        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random, cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int numDocs            = TestUtil.NextInt32(Random, 100, 400);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random.Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.IndexWriter, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }
Beispiel #29
0
        public void TestTotalGroupCount()
        {
            string    groupField = "author";
            FieldType customType = new FieldType();

            customType.IsStored = true;

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);

            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "random text", Field.Store.YES));
            doc.Add(new Field("id", "1", customType));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.YES));
            doc.Add(new Field("id", "2", customType));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
            doc.Add(new Field("id", "3", customType));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV);
            doc.Add(new TextField("content", "some random text", Field.Store.YES));
            doc.Add(new Field("id", "4", customType));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "5", customType));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "random blob", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            IAbstractAllGroupsCollector <object> allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector);
            assertEquals(4, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector);
            assertEquals(3, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector);
            assertEquals(2, allGroupsCollector.GroupCount);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #30
0
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #31
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random.Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues);

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).SetStringValue(myID);

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = LuceneTestCase.Random.NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs);

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.GetSizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
Beispiel #32
0
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Beispiel #33
0
        /// <summary>
        /// Split a given index into 3 indexes for training, test and cross validation tasks respectively
        /// </summary>
        /// <param name="originalIndex">an <see cref="AtomicReader"/> on the source index</param>
        /// <param name="trainingIndex">a <see cref="Directory"/> used to write the training index</param>
        /// <param name="testIndex">a <see cref="Directory"/> used to write the test index</param>
        /// <param name="crossValidationIndex">a <see cref="Directory"/> used to write the cross validation index</param>
        /// <param name="analyzer"><see cref="Analyzer"/> used to create the new docs</param>
        /// <param name="fieldNames">names of fields that need to be put in the new indexes or <c>null</c> if all should be used</param>
        /// <exception cref="IOException">if any writing operation fails on any of the indexes</exception>
        public virtual void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames)
        {
#pragma warning disable 612, 618
            // create IWs for train / test / cv IDXs
            IndexWriter testWriter     = new IndexWriter(testIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter cvWriter       = new IndexWriter(crossValidationIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer));
#pragma warning restore 612, 618

            try
            {
                int size = originalIndex.MaxDoc;

                IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
                TopDocs       topDocs       = indexSearcher.Search(new MatchAllDocsQuery(), int.MaxValue);

                // set the type to be indexed, stored, with term vectors
                FieldType ft = new FieldType(TextField.TYPE_STORED);
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = true;
                ft.StoreTermVectorPositions = true;

                int b = 0;

                // iterate over existing documents
                foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                {
                    // create a new document for indexing
                    Document doc = new Document();
                    if (fieldNames != null && fieldNames.Length > 0)
                    {
                        foreach (string fieldName in fieldNames)
                        {
                            doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft));
                        }
                    }
                    else
                    {
                        foreach (IIndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields)
                        {
                            if (storableField.GetReaderValue() != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.GetReaderValue(), ft));
                            }
                            else if (storableField.GetBinaryValue() != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.GetBinaryValue(), ft));
                            }
                            else if (storableField.GetStringValue() != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.GetStringValue(), ft));
                            }
                            else if (storableField.NumericType != NumericFieldType.NONE) // LUCENENET specific - checking the NumricType property is quicker than the type conversion
                            {
                                // LUCENENET specific - need to pass invariant culture here (we are assuming the Field will be stored)
                                // and we need to round-trip floating point numbers so we don't lose precision.
                                if (storableField.NumericType == NumericFieldType.SINGLE || storableField.NumericType == NumericFieldType.DOUBLE)
                                {
                                    // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564
                                    doc.Add(new Field(storableField.Name, storableField.GetStringValue("R", CultureInfo.InvariantCulture), ft));
                                }
                                else
                                {
                                    doc.Add(new Field(storableField.Name, storableField.GetStringValue(CultureInfo.InvariantCulture), ft));
                                }
                            }
                        }
                    }

                    // add it to one of the IDXs
                    if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
                    {
                        testWriter.AddDocument(doc);
                    }
                    else if (cvWriter.MaxDoc < size * _crossValidationRatio)
                    {
                        cvWriter.AddDocument(doc);
                    }
                    else
                    {
                        trainingWriter.AddDocument(doc);
                    }
                    b++;
                }
            }
            catch (Exception e)
            {
                throw new IOException("Exceptio in DatasetSplitter", e);
            }
            finally
            {
                testWriter.Commit();
                cvWriter.Commit();
                trainingWriter.Commit();
                // close IWs
                testWriter.Dispose();
                cvWriter.Dispose();
                trainingWriter.Dispose();
            }
        }
        public virtual void TestRandom()
        {
            Random random       = Random;
            int    numberOfRuns = TestUtil.NextInt32(random, 3, 6);

            for (int indexIter = 0; indexIter < numberOfRuns; indexIter++)
            {
                IndexContext context = CreateIndexContext();
                for (int searchIter = 0; searchIter < 100; searchIter++)
                {
                    IndexSearcher searcher  = NewSearcher(context.indexReader);
                    bool          useDv     = context.dvType != DocValuesType.NONE && random.nextBoolean();
                    DocValuesType dvType    = useDv ? context.dvType : DocValuesType.NONE;
                    string        term      = context.contentStrings[random.nextInt(context.contentStrings.Length)];
                    Sort          groupSort = new Sort(new SortField("id", SortFieldType.STRING));
                    int           topN      = 1 + random.nextInt(10);

                    List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > expectedResult = CreateExpectedResult(context, term, groupSort, topN);

                    IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, groupSort, groupField, topN);
                    searcher.Search(new TermQuery(new Term("content", term)), firstCollector);
                    IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                        = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
                    searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector);

                    // LUCENENET TODO: Try to work out how to do this without an O(n) operation
                    List <AbstractDistinctValuesCollector.IGroupCount <IComparable> > actualResult = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

                    if (Verbose)
                    {
                        Console.WriteLine("Index iter=" + indexIter);
                        Console.WriteLine("Search iter=" + searchIter);
                        Console.WriteLine("1st pass collector class name=" + firstCollector.GetType().Name);
                        Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name);
                        Console.WriteLine("Search term=" + term);
                        Console.WriteLine("DVType=" + dvType);
                        Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false).toString());
                        Console.WriteLine("Expected:");
                        PrintGroups(expectedResult);
                        Console.WriteLine("Actual:");
                        PrintGroups(actualResult);
                        Console.Out.Flush();
                    }

                    assertEquals(expectedResult.Count, actualResult.Count);
                    for (int i = 0; i < expectedResult.size(); i++)
                    {
                        AbstractDistinctValuesCollector.IGroupCount <IComparable> expected = expectedResult[i];
                        AbstractDistinctValuesCollector.IGroupCount <IComparable> actual   = actualResult[i];
                        AssertValues(expected.GroupValue, actual.GroupValue);
                        assertEquals(expected.UniqueValues.Count(), actual.UniqueValues.Count());
                        List <IComparable> expectedUniqueValues = new List <IComparable>(expected.UniqueValues);
                        expectedUniqueValues.Sort(nullComparer);
                        List <IComparable> actualUniqueValues = new List <IComparable>(actual.UniqueValues);
                        actualUniqueValues.Sort(nullComparer);
                        for (int j = 0; j < expectedUniqueValues.size(); j++)
                        {
                            AssertValues(expectedUniqueValues[j], actualUniqueValues[j]);
                        }
                    }
                }
                context.indexReader.Dispose();
                context.directory.Dispose();
            }
        }
Beispiel #35
0
 public static Hits Search(Query q, int revFirst, int revLast)
 {
     return(Searcher.Search(q, new RevisionFilter(revFirst, revLast)));
 }
        public void TestBasics()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            var docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "restaurant"));
                query.Add(new Term("field", "universe"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            // Uses default $facets field:
            Facets facets;

            if (Random.NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
                if (Random.NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestTransitionAPI()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms tvs = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetHierarchical("Publish Date", true);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new FacetField("Author", "Bob"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Susan"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Frank"));
            doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);

            // Retrieve & verify results:
            Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
            Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());

            // Now user drills down on Publish Date/2010:
            DrillDownQuery q2 = new DrillDownQuery(config);

            q2.Add("Publish Date", "2010");
            c = new FacetsCollector();
            searcher.Search(q2, c);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());

            Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));

            Assert.Null(facets.GetTopChildren(10, "Non exitent dim"));

            // Smoke test PrintTaxonomyStats:
            string result;

            using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
            {
                using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true)
                {
                    AutoFlush = true
                })
                {
                    PrintTaxonomyStats.PrintStats(taxoReader, w, true);
                }
                result = bos.ToString();
            }
            Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
            // Make sure at least a few nodes of the tree came out:
            Assert.True(result.IndexOf("  /1999", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("  /2012", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("      /20", StringComparison.Ordinal) != -1);

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        private static void GenerateHighlights(IList<Document> documents, IndexWriter writer, SearchCriteria criteria)
        {
            var documentHightlightMap = documents.ToDictionary(c => c._id.ToString());

            var reader = DirectoryReader.Open(writer, true, true);
            var queryParser = new HighlighterQueryParser(writer.GetAnalyzer());
            queryParser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);

            var query = queryParser.Parse(criteria.Query)
                                   .Rewrite(reader);

            var highlighter = CreateHighlighter();
            var fieldQuery = highlighter.GetFieldQuery(query);

            var searcher = new IndexSearcher(reader);
            var topFieldDocs = searcher.Search(query, documents.Count, Sort.RELEVANCE);
            var scoreDocs = topFieldDocs.ScoreDocs;

            foreach (var sd in scoreDocs)
            {
                var bestFragments = highlighter.GetBestFragments(fieldQuery, reader, sd.Doc, Schema.StandardField.FULL_TEXT, FRAGMENT_SIZE, FRAGMENT_COUNT);
                var document = searcher.Doc(sd.Doc);
                var docId = document.Get(Schema.StandardField.ID);

                if (documentHightlightMap.ContainsKey(docId) && bestFragments.Length > 0)
                {
                    var dictionary = documentHightlightMap[docId].AsDictionary();
                    var highlight = String.Join($"{Environment.NewLine} ... {Environment.NewLine}", bestFragments);
                    dictionary[HIGHLIGHT_FIELD_NAME] = highlight;
                }
            }
        }
        public virtual void TestSlowCompositeReaderWrapper()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo1"));
            writer.AddDocument(config.Build(doc));

            writer.Commit();

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo2"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader));

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = new SortedSetDocValuesFacetCounts(state, c);

            // Ask for top 10 labels for any dims that have counts:
            Assert.AreEqual("dim=a path=[] value=2 childCount=2\n  foo1 (1)\n  foo2 (1)\n", facets.GetTopChildren(10, "a").ToString());

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }
        /// <summary>
        ///     Searches the datasource using the specified criteria. Criteria is parsed by the query builder specified by
        ///     <typeparamref
        ///         name="QueryBuilderType" />
        ///     .
        /// </summary>
        /// <param name="scope">Name of the application.</param>
        /// <param name="criteria">The criteria.</param>
        /// <returns></returns>
        /// <exception cref="VirtoCommerce.Search.Providers.Lucene.LuceneSearchException"></exception>
        public virtual ISearchResults Search(string scope, ISearchCriteria criteria)
        {
            TopDocs docs = null;

            var folderName = this.GetFolderName(scope, criteria.DocumentType);

            var dir = FSDirectory.Open(new DirectoryInfo(this.GetDirectoryPath(folderName)));
            var searcher = new IndexSearcher(dir);

            var q = (Query)this.QueryBuilder.BuildQuery(criteria);

            Debug.WriteLine("Search Lucene Query:{0}", (object)q.ToString());

            try
            {
                var numDocs = criteria.StartingRecord + criteria.RecordsToRetrieve;

                if (criteria.Sort != null)
                {
                    var fields = criteria.Sort.GetSort();

                    docs = searcher.Search(
                        q,
                        null,
                        numDocs,
                        new Sort(
                            fields.Select(field => new SortField(field.FieldName, field.DataType, field.IsDescending))
                                  .ToArray()));
                }
                else
                {
                    docs = searcher.Search(q, numDocs);
                }
            }
            catch (Exception ex)
            {
                throw new LuceneSearchException("Search exception", ex);
            }

            var results = new LuceneSearchResults(searcher, searcher.IndexReader, docs, criteria, q);

            // Cleanup here
            searcher.IndexReader.Dispose();
            searcher.Dispose();
            return results.Results;
        }
        /// <summary>
        /// Поиск на основе ранее построенного индекса
        /// </summary>
        private static ICollection <string> Search(
            int forumID,
            string searchText,
            bool searchInText,
            bool searchInSubject,
            bool searchAuthor,
            bool searchInMyMessages,
            bool searchAnyWords,
            DateTime from,
            DateTime to)
        {
            var result           = new List <string>();
            var query            = new BooleanQuery();
            var analyzer         = new RussianAnalyzer(Version.LUCENE_30);
            var indexPath        = GetIndexDir();
            var searchTextExists = !string.IsNullOrEmpty(searchText);

            #region Обработка строки
            // Сигнатура языка поиска - **
            if (searchTextExists)
            {
                if (searchText.StartsWith(_signature))
                {
                    // Да, хотим использовать язык, отрезаем ** и считаем остаток строки написанным на языке поиска
                    searchText = searchText.Substring(_signature.Length);
                }
                else
                {
                    // Используем простой поиск: экранируем спецсимволы, получаем токены (пробел - разделитель), учитываем флажок searchAnyWords (AND/OR)
                    // Порядок важен, первое - \\
                    var specChars = new[] { "\\", "+", "-", "&", "|", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":" };
                    searchText =
                        specChars
                        .Aggregate(
                            searchText,
                            (current, specChar) => current.Replace(specChar, "\\" + specChar));
                    var token = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

                    if (searchAnyWords)
                    {
                        searchText = string.Join(" ", token);
                    }
                    else
                    {
                        searchText = "+" + string.Join(" +", token);
                    }
                }
            }
            #endregion

            if (forumID != -1)
            {
                query.Add(
                    new TermQuery(new Term("gid", forumID.ToString())),
                    Occur.MUST);
            }

            if (searchInMyMessages)
            {
                query.Add(
                    new TermQuery(new Term("uid", Config.Instance.SelfId.ToString())),
                    Occur.MUST);
            }

            //if (searchInQuestions)
            //  bq.Add(new TermQuery(new Term("tid", "0")), true, false);

            if (from.Ticks != 0 || to.Ticks != 0)
            {
                var rq = new TermRangeQuery("dte", FormatDate(from), FormatDate(to), true, true);
                query.Add(rq, Occur.MUST);
            }

            if (searchTextExists)
            {
                var searchTextQuery = new BooleanQuery();
                if (searchInText)
                {
                    searchTextQuery.Add(
                        new QueryParser(Version.LUCENE_29, "message", analyzer).Parse(searchText),
                        Occur.SHOULD);
                }
                if (searchInSubject)
                {
                    searchTextQuery.Add(
                        new QueryParser(Version.LUCENE_29, "subject", analyzer).Parse(searchText),
                        Occur.SHOULD);
                }
                if (searchAuthor)
                {
                    searchTextQuery.Add(
                        new QueryParser(Version.LUCENE_29, "usernick", analyzer).Parse(searchText),
                        Occur.SHOULD);
                }
                query.Add(searchTextQuery, Occur.MUST);
            }

            var searcher = new IndexSearcher(indexPath, true);
            try
            {
                var topDocs = searcher.Search(query, _maxSearchReults);
                result
                .AddRange(
                    topDocs
                    .ScoreDocs
                    .Select(scored => searcher.Doc(scored.Doc).Get("mid")));
            }
            finally
            {
                searcher.Close();
            }

            return(result);
        }
        public virtual void TestHugeLabel()
        {
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
            FacetsConfig config = new FacetsConfig();

            // Add one huge label:
            string bigs = null;
            int ordinal = -1;

            int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
            bigs = TestUtil.RandomSimpleString(Random(), len, len);
            FacetField ff = new FacetField("dim", bigs);
            FacetLabel cp = new FacetLabel("dim", bigs);
            ordinal = taxoWriter.AddCategory(cp);
            Document doc = new Document();
            doc.Add(ff);
            indexWriter.AddDocument(config.Build(taxoWriter, doc));

            // Add tiny ones to cause a re-hash
            for (int i = 0; i < 3; i++)
            {
                string s = TestUtil.RandomSimpleString(Random(), 1, 10);
                taxoWriter.AddCategory(new FacetLabel("dim", s));
                doc = new Document();
                doc.Add(new FacetField("dim", s));
                indexWriter.AddDocument(config.Build(taxoWriter, doc));
            }

            // when too large components were allowed to be added, this resulted in a new added category
            Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp));

            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);
            DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
            ddq.Add("dim", bigs);
            Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits);

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
Beispiel #45
0
        /// <summary>
        /// Searches the specified phrase in the specified search fields.
        /// </summary>
        /// <param name="searchFields">The search fields.</param>
        /// <param name="phrase">The phrase to search.</param>
        /// <param name="searchOption">The search options.</param>
        /// <returns>A list of <see cref="SearchResult"/> items.</returns>
        public static List <SearchResult> Search(SearchField[] searchFields, string phrase, SearchOptions searchOption)
        {
            IIndexDirectoryProviderV30 indexDirectoryProvider = Collectors.IndexDirectoryProvider;
            Analyzer analyzer = new SimpleAnalyzer();

            using (IndexSearcher searcher = new IndexSearcher(indexDirectoryProvider.GetDirectory(), false))
            {
                string[] searchFieldsAsString     = (from f in searchFields select f.AsString()).ToArray();
                MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchFieldsAsString, analyzer);

                if (searchOption == SearchOptions.AllWords)
                {
                    queryParser.DefaultOperator = QueryParser.Operator.AND;
                }

                if (searchOption == SearchOptions.AtLeastOneWord)
                {
                    queryParser.DefaultOperator = QueryParser.Operator.OR;
                }

                try
                {
                    Query   query   = queryParser.Parse(phrase);
                    TopDocs topDocs = searcher.Search(query, 100);

                    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b class=\"searchkeyword\">", "</b>"), new QueryScorer(query));

                    List <SearchResult> searchResults = new List <SearchResult>(topDocs.TotalHits);
                    for (int i = 0; i < Math.Min(100, topDocs.TotalHits); i++)
                    {
                        Document doc = searcher.Doc(topDocs.ScoreDocs[i].Doc);

                        SearchResult result = new SearchResult();
                        result.DocumentType = DocumentTypeFromString(doc.GetField(SearchField.DocumentType.AsString()).StringValue);
                        result.Relevance    = topDocs.ScoreDocs[i].Score * 100;
                        switch (result.DocumentType)
                        {
                        case DocumentType.Page:
                            PageDocument page = new PageDocument();
                            page.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                            page.Title        = doc.GetField(SearchField.Title.AsString()).StringValue;

                            TokenStream tokenStream1 = analyzer.TokenStream(SearchField.Title.AsString(), new StringReader(page.Title));
                            page.HighlightedTitle = highlighter.GetBestFragments(tokenStream1, page.Title, 3, " [...] ");

                            page.Content = doc.GetField(SearchField.Content.AsString()).StringValue;

                            tokenStream1            = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(page.Content));
                            page.HighlightedContent = highlighter.GetBestFragments(tokenStream1, page.Content, 3, " [...] ");

                            result.Document = page;
                            break;

                        case DocumentType.Message:
                            MessageDocument message = new MessageDocument();
                            message.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                            message.DateTime     = DateTime.Parse(doc.GetField(SearchField.MessageDateTime.AsString()).StringValue);
                            message.Subject      = doc.GetField(SearchField.Title.AsString()).StringValue;
                            message.Body         = doc.GetField(SearchField.Content.AsString()).StringValue;

                            TokenStream tokenStream2 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(message.Body));
                            message.HighlightedBody = highlighter.GetBestFragments(tokenStream2, message.Body, 3, " [...] ");

                            result.Document = message;
                            break;

                        case DocumentType.Attachment:
                            PageAttachmentDocument attachment = new PageAttachmentDocument();
                            attachment.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                            attachment.FileName     = doc.GetField(SearchField.FileName.AsString()).StringValue;
                            attachment.FileContent  = doc.GetField(SearchField.FileContent.AsString()).StringValue;

                            TokenStream tokenStream3 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(attachment.FileContent));
                            attachment.HighlightedFileContent = highlighter.GetBestFragments(tokenStream3, attachment.FileContent, 3, " [...] ");

                            result.Document = attachment;
                            break;

                        case DocumentType.File:
                            FileDocument file = new FileDocument();
                            file.FileName    = doc.GetField(SearchField.FileName.AsString()).StringValue;
                            file.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue;

                            TokenStream tokenStream4 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(file.FileContent));
                            file.HighlightedFileContent = highlighter.GetBestFragments(tokenStream4, file.FileContent, 3, " [...]");

                            result.Document = file;
                            break;

                        case DocumentType.SourceControlFile:
                            FileDocument scfile = new FileDocument();
                            scfile.FileName    = doc.GetField(SearchField.FileName.AsString()).StringValue;
                            scfile.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue;

                            TokenStream tokenStream5 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(scfile.FileContent));
                            scfile.HighlightedFileContent = highlighter.GetBestFragments(tokenStream5, scfile.FileContent, 3, " [...]");

                            result.Document = scfile;
                            break;
                        }

                        searchResults.Add(result);
                    }
                    return(searchResults);
                }
                catch (ParseException)
                {
                    return(new List <SearchResult>(0));
                }
            }
        }
Beispiel #46
0
        public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd)
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES));
            doc.Add(new StringField("body", "body", Field.Store.YES));
            writer.AddDocument(doc);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new TermQuery(new Term("body", "body"));

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi searcher not
            // supported).
            ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(0, result.Length, "The index Term should not be included.");

            result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(1, result.Length, "The index Term should be included.");

            reader.Dispose();
            dir.Dispose();
        }
        public IEnumerable <(Document Result, Dictionary <string, Dictionary <string, string[]> > Highlightings, ExplanationResult Explanation)> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            var method = query.Metadata.Query.Where as MethodExpression;

            if (method == null)
            {
                throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just an intersect() method call while it got {query.Metadata.Query.Where.Type} expression", query.Metadata.QueryText, query.QueryParameters);
            }

            var methodName = method.Name;

            if (string.Equals("intersect", methodName) == false)
            {
                throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just a single intersect() method call while it got '{methodName}' method", query.Metadata.QueryText, query.QueryParameters);
            }

            if (method.Arguments.Count <= 1)
            {
                throw new InvalidQueryException("The valid intersect query must have multiple intersect clauses.", query.Metadata.QueryText, query.QueryParameters);
            }

            var subQueries = new Query[method.Arguments.Count];

            for (var i = 0; i < subQueries.Length; i++)
            {
                var whereExpression = method.Arguments[i] as QueryExpression;

                if (whereExpression == null)
                {
                    throw new InvalidQueryException($"Invalid intersect query. The intersect clause at position {i} isn't a valid expression", query.Metadata.QueryText, query.QueryParameters);
                }

                subQueries[i] = GetLuceneQuery(documentsContext, query.Metadata, whereExpression, query.QueryParameters, _analyzer, _queryBuilderFactories);
            }

            //Not sure how to select the page size here??? The problem is that only docs in this search can be part
            //of the final result because we're doing an intersection query (but we might exclude some of them)
            var pageSize                    = GetPageSize(_searcher, query.PageSize);
            int pageSizeBestGuess           = GetPageSize(_searcher, ((long)query.Start + query.PageSize) * 2);
            int skippedResultsInCurrentLoop = 0;
            int previousBaseQueryMatches    = 0;

            var firstSubDocumentQuery = subQueries[0];
            var sort = GetSort(query, _index, getSpatialField, documentsContext);

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state))
            {
                //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
                var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                var currentBaseQueryMatches = search.ScoreDocs.Length;
                var intersectionCollector   = new IntersectionCollector(_searcher, search.ScoreDocs, _state);

                int intersectMatches;
                do
                {
                    token.ThrowIfCancellationRequested();
                    if (skippedResultsInCurrentLoop > 0)
                    {
                        // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
                        pageSizeBestGuess = pageSizeBestGuess * 2;

                        search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                        previousBaseQueryMatches = currentBaseQueryMatches;
                        currentBaseQueryMatches  = search.ScoreDocs.Length;
                        intersectionCollector    = new IntersectionCollector(_searcher, search.ScoreDocs, _state);
                    }

                    for (var i = 1; i < subQueries.Length; i++)
                    {
                        _searcher.Search(subQueries[i], null, intersectionCollector, _state);
                    }

                    var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                    intersectMatches            = currentIntersectResults.Count;
                    skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
                } while (intersectMatches < pageSize &&                       //stop if we've got enough results to satisfy the pageSize
                         currentBaseQueryMatches < search.TotalHits &&        //stop if increasing the page size wouldn't make any difference
                         previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results

                var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
                //because it's before the INTERSECTION has been applied, so only some of those results make it out.
                //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
                totalResults.Value   = search.TotalHits;
                skippedResults.Value = skippedResultsInCurrentLoop;

                //Using the final set of results in the intersectionCollector
                int returnedResults = 0;
                for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++)
                {
                    var indexResult = intersectResults[i];
                    var document    = _searcher.Doc(indexResult.LuceneId, _state);

                    if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    var result = retriever.Get(document, indexResult.Score, _state);
                    if (scope.TryIncludeInResults(result) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    returnedResults++;

                    yield return(result, null, null);

                    if (returnedResults == pageSize)
                    {
                        yield break;
                    }
                }
            }
        }
Beispiel #48
0
 // LUCENE-1404
 private int HitCount(IndexSearcher searcher, string word)
 {
     return searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits;
 }
Beispiel #49
0
		static void Main(string[] args)
		{
			var options = new DirectoryExternalStorageOptions("indexing");
			var input = File.OpenRead(@"C:\Users\Ayende\Downloads\Crimes_-_2001_to_present.csv");
			//var sorter = new ExternalSorter(input, options, new int[]
			//{
			//	1,// case number
			//	4, // ICHR

			//});

			//var sp = Stopwatch.StartNew();

			//sorter.Sort();

			//Console.WriteLine(sp.Elapsed);

			//var r = new SourceReader(
			//	File.OpenRead(@"C:\work\ExternalSorting\ExternalSorting.Tryouts\bin\Debug\indexing\0.index"),
			//	Encoding.UTF8, new[] {0, 1});

			//r.SetPositionToLineAt(1000);
			//var result = r.ReadFromStream().First();

			//var prev = new IndexEntry
			//{
			//	Value = new ArraySegment<char>(new char[0])
			//};
			//int entries = 0;
			//for (int i = 0; i < r.NumberOfPages; i++)
			//{
			//	while (true)
			//	{
			//		var entry = r.Read();
			//		if (entry == null)
			//			break;
			//		entries++;
			//		var match = Utils.CompareIndexEntries(prev, entry);
			//		Console.WriteLine(new string(prev.Value.Array, prev.Value.Offset, prev.Value.Count) + " - " + new string(entry.Value.Array, entry.Value.Offset, entry.Value.Count) + " = " + match);
			//		var array = new char[entry.Value.Count];
			//		Array.Copy(entry.Value.Array, entry.Value.Offset, array, 0, entry.Value.Count);
			//		prev.Value = new ArraySegment<char>(array);
			//	}
			//}

			//Console.WriteLine();
			//Console.WriteLine(entries);

			var searcher = new IndexSearcher(input, File.OpenRead(@"C:\work\ExternalSorting\ExternalSorting.Tryouts\bin\Debug\indexing\0.index"),
				Encoding.UTF8);

			for (int i = 0; i < 10; i++)
			{
				var sp = Stopwatch.StartNew();
				foreach (var line in searcher.Search(@"HT574031"))
				{
					Console.WriteLine(line);
				}
				Console.WriteLine(sp.Elapsed);
			}
		}