TopDocs C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

파일: LuceneSearchResults.cs 프로젝트: gitter-badger/vc-community-1.x

 /// <summary>
 ///     Initializes a new instance of the <see cref="SearchResults" /> class.
 /// </summary>
 /// <param name="searcher">The searcher.</param>
 /// <param name="reader">The reader.</param>
 /// <param name="docs">The hits.</param>
 /// <param name="criteria">The criteria.</param>
 /// <param name="query">The query.</param>
 public LuceneSearchResults(
     Searcher searcher, IndexReader reader, TopDocs docs, ISearchCriteria criteria, Query query)
 {
     Results = new SearchResults(criteria, null);
     CreateDocuments(searcher, docs);
     CreateFacets(reader, query);
     CreateSuggestions(reader, criteria);
 }

예제 #2

0

파일 보기

파일: SearchUtils.cs 프로젝트: cris-almodovar/expando-db

        /// <summary>
        /// Populates the SearchResult object with data from the specified TopFieldDocs object.
        /// </summary>
        /// <param name="result">The SearchResult to be populated.</param>
        /// <param name="topDocs">The TopDocs object returned by Lucene.</param>
        /// <param name="categories">The categories.</param>
        /// <param name="getDoc">A lambda that returns the Lucene document given the doc id.</param>
        /// <exception cref="ArgumentNullException">
        /// </exception>
        public static void PopulateWith(this SearchResult<Guid> result, TopDocs topDocs, IEnumerable<Category> categories, Func<int, LuceneDocument> getDoc)
        {
            if (result == null)
                throw new ArgumentNullException(nameof(result));
            if (topDocs == null)
                throw new ArgumentNullException(nameof(topDocs));
            if (getDoc == null)
                throw new ArgumentNullException(nameof(getDoc));

            result.ItemCount = topDocs.ScoreDocs.Length;
            result.TotalHits = topDocs.TotalHits;

            if (result.ItemCount > 0)
            {
                var itemsToSkip = (result.PageNumber - 1) * result.ItemsPerPage;
                var itemsToTake = result.ItemsPerPage;

                var scoreDocs = topDocs.ScoreDocs
                                            .Skip(itemsToSkip)
                                            .Take(itemsToTake)
                                            .ToList();

                var documentIds = new List<Guid>();
                for (var i = 0; i < scoreDocs.Count; i++)
                {
                    var sd = scoreDocs[i];
                    var doc = getDoc(sd.Doc);
                    if (doc == null)
                        continue;

                    var idField = doc.GetField(Schema.StandardField.ID);
                    var idValue = idField.StringValue();

                    documentIds.Add(Guid.Parse(idValue));
                }

                result.Items = documentIds;
                result.Categories = categories ?? Enumerable.Empty<Category>();
                result.PageCount = ComputePageCount(result.ItemCount, result.ItemsPerPage);
            }
        }

예제 #3

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

 public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher,
                                  TopDocs hits, Query query, IFormatter formatter)
 {
     DoStandardHighlights(analyzer, searcher, hits, query, formatter, false);
 }

예제 #4

0

파일 보기

        public object SearchPrevIndex(Dictionary <string, string> dic, int pageIndex, int pageSize, LibHandle handle, string lastFileId)
        {
            BooleanQuery bQuery = new BooleanQuery();

            foreach (var item in dic)
            {
                QueryParser parse = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, item.Key, PanGuAnalyzer);
                Query       query = parse.Parse(GetKeyWordsSplitBySpace(item.Value));
                parse.SetDefaultOperator(QueryParser.Operator.AND);
                bQuery.Add(query, BooleanClause.Occur.MUST);
            }

            IndexSearcher search    = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(IndexDic)), true);
            Stopwatch     stopwatch = Stopwatch.StartNew();
            //SortField构造函数第三个字段true为降序,false为升序
            Sort    sort = new Sort(new SortField("fileId", SortField.STRING, true));
            TopDocs docs = search.Search(bQuery, null, 1000, sort);

            stopwatch.Stop();
            SearchResult doclist = new SearchResult();

            if (docs != null && docs.totalHits > 0)
            {
                doclist.SearchTime = stopwatch.ElapsedMilliseconds;
                doclist.TotalHits  = 0;
                List <AbstractFileBase> fileList = new List <AbstractFileBase>();
                List <string>           docIds   = new List <string>();
                Queue docsQueue = new Queue();
                for (int i = (pageIndex - 1) * pageSize; i < docs.totalHits; i++)
                {
                    Document doc    = search.Doc(docs.scoreDocs[i].doc);
                    string   fileId = doc.Get("fileId").ToString();
                    docIds.Add(fileId);
                    fileList.Add(new TextFileInfo()
                    {
                        FileId  = fileId,
                        Content = doc.Get("content").ToString(),
                    });
                    #region 每循环100次，或最后一次循环
                    if (((i - (pageIndex - 1) * pageSize) % 100 == 0 && i != 0) || i == docs.totalHits - 1)
                    {
                        List <string> allowDocIds = DMPermissionControl.Default.FilterDocIds(handle, DMFuncPermissionEnum.Read, docIds);

                        foreach (AbstractFileBase file in fileList)
                        {
                            if (allowDocIds.Contains(file.FileId))
                            {
                                if (string.Compare(file.FileId, lastFileId) <= 0)
                                {
                                    foreach (object o in docsQueue.ToArray())
                                    {
                                        doclist.Docs.Add((AbstractFileBase)o);
                                    }
                                    return(doclist);
                                }
                                else
                                {
                                    if (docsQueue.Count >= pageSize)
                                    {
                                        docsQueue.Dequeue();
                                    }
                                    docsQueue.Enqueue(file);
                                }
                            }
                        }
                        docIds.Clear();
                        fileList.Clear();
                    }
                    #endregion
                }
            }
            return(doclist);
        }

예제 #5

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

        public void TestMultiSearcher()
        {
            // setup index 1
            RAMDirectory ramDir1 = new RAMDirectory();
            IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(TEST_VERSION), true,
                                                  IndexWriter.MaxFieldLength.UNLIMITED);
            Document d = new Document();
            Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
            d.Add(f);
            writer1.AddDocument(d);
            writer1.Optimize();
            writer1.Close();
            IndexReader reader1 = IndexReader.Open(ramDir1, true);

            // setup index 2
            RAMDirectory ramDir2 = new RAMDirectory();
            IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(TEST_VERSION), true,
                                                  IndexWriter.MaxFieldLength.UNLIMITED);
            d = new Document();
            f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
            d.Add(f);
            writer2.AddDocument(d);
            writer2.Optimize();
            writer2.Close();
            IndexReader reader2 = IndexReader.Open(ramDir2, true);

            var searchers = new IndexSearcher[2];
            searchers[0] = new IndexSearcher(ramDir1, true);
            searchers[1] = new IndexSearcher(ramDir2, true);
            MultiSearcher multiSearcher = new MultiSearcher(searchers);
            QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
            parser.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
            query = parser.Parse("multi*");
            Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
            // at this point the multisearcher calls combine(query[])
            hits = multiSearcher.Search(query, null, 1000);

            // query = QueryParser.Parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
            Query[] expandedQueries = new Query[2];
            expandedQueries[0] = query.Rewrite(reader1);
            expandedQueries[1] = query.Rewrite(reader2);
            query = query.Combine(expandedQueries);

            // create an instance of the highlighter with the tags used to surround
            // highlighted text
            Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = multiSearcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));
                String highlightedText = highlighter.GetBestFragment(tokenStream, text);
                Console.WriteLine(highlightedText);
            }
            Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");

        }

예제 #6

0

파일 보기

        public void TestSimpleWithScoring()
        {
            const string idField = "id";
            const string toField = "movieId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "A random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.Reader);

            w.Dispose();

            // Search for movie via subtitle
            Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
                                                       new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            // Score mode max.
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Max);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            // Score mode total
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Total);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            //Score mode avg
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Avg);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

예제 #7

0

파일 보기

        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random(), dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random().NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.Reader;
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random().Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (VERBOSE)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random().Next(scoreModeLength);
                    if (VERBOSE)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (VERBOSE)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.TopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (VERBOSE)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }

예제 #8

0

파일 보기

        /// <summary>
        /// Searches for and displays results.
        /// </summary>
        /// <param name="search">The text to search for. Supports file title and contents.</param>
        /// <param name="writeToConsole">Whether or not to write search status to console (errors still report).</param>
        public Task <List <string> > SearchFiles(string search, bool writeToConsole = true)
        {
            SearchText = search;
            return(Task.Run(() => {
                var matches = new List <string>();
                if (!IndexDirectoryExists() && !DirectoryReader.IndexExists(fSDirectory))
                {
                    GeneralHelper.WriteToConsole($"No index available! Please unpack game assets and generate an index.\n");
                    return matches;
                }

                try
                {
                    using (Analyzer analyzer = new CustomAnalyzer())
                        using (IndexReader reader = DirectoryReader.Open(fSDirectory))
                        {
                            IndexSearcher searcher = new IndexSearcher(reader);
                            MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, new[] { "title", "body" }, analyzer)
                            {
                                AllowLeadingWildcard = true
                            };
                            Query searchTermQuery = queryParser.Parse('*' + QueryParser.Escape(search.Trim()) + '*');

                            BooleanQuery aggregateQuery = new BooleanQuery()
                            {
                                { searchTermQuery, Occur.MUST }
                            };

                            if (reader.MaxDoc != 0)
                            {
                                var start = DateTime.Now;
                                if (writeToConsole)
                                {
                                    GeneralHelper.WriteToConsole("Search started.\n");
                                }

                                // perform search
                                TopDocs topDocs = searcher.Search(aggregateQuery, reader.MaxDoc);

                                if (writeToConsole)
                                {
                                    GeneralHelper.WriteToConsole($"Search returned {topDocs.ScoreDocs.Length} results in {TimeSpan.FromTicks(DateTime.Now.Subtract(start).Ticks).TotalMilliseconds} ms\n");
                                }

                                // display results
                                foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                                {
                                    float score = scoreDoc.Score;
                                    int docId = scoreDoc.Doc;

                                    Document doc = searcher.Doc(docId);

                                    matches.Add(doc.Get("path"));
                                }
                            }
                            else
                            {
                                GeneralHelper.WriteToConsole("No documents available. Please generate the index again.\n");
                            }
                        }
                }
                catch
                {
                    // Checking if the index is corrupt is slower than just letting it fail
                    GeneralHelper.WriteToConsole($"Available index is corrupt. Please rerun the indexer to create a new one.\n");
                }

                return matches.OrderBy(m => m).ToList();
            }));
        }

예제 #9

0

파일 보기

파일: ThreadedIndexingAndSearchingTestCase.cs 프로젝트: wwb/lucenenet

        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray()));
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }

예제 #10

0

파일 보기

파일: GalleryServiceImpl.cs 프로젝트: ZeroInfinite/NuGet.Services.Metadata

        private static void DocumentCountImpl(JsonWriter jsonWriter, IndexSearcher searcher, Query query)
        {
            TopDocs topDocs = searcher.Search(query, 1);

            ResponseFormatter.WriteV2CountResult(jsonWriter, topDocs.TotalHits);
        }

예제 #11

0

파일 보기

파일: TestNRTCachingDirectory.cs 프로젝트: clieben/lucenenet

        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random, cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int numDocs            = TestUtil.NextInt32(Random, 100, 400);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random.Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.IndexWriter, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }

예제 #12

0

파일 보기

        /// <summary>
        ///
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="query"></param>
        /// <param name="headerItemXmlNodeList"></param>
        /// <returns></returns>
        public static SearchResult search(Query query, List<XmlNode> headerItemXmlNodeList)
        {
            int n = 0;
            DatasetManager dm = null;
            try
            {
                dm = new DatasetManager();
                n = dm.DatasetRepo.Get().Count;

                if (n == 0) n = 1000;
            }
            catch
            {
                n = 1000;
            }
            finally
            {
                dm.Dispose();
                if (n <= 0)
                    n = 1000;
            }
            TopDocs docs = searcher.Search(query, n);
            SearchResult sro = new SearchResult();
            sro.PageSize = 10;
            sro.CurrentPage = 1;
            sro.NumberOfHits = 100;

            List<HeaderItem> Header = new List<HeaderItem>();
            List<HeaderItem> DefaultHeader = new List<HeaderItem>();

            // create id
            HeaderItem id = new HeaderItem();
            id.DisplayName = "ID";
            id.Name = "ID";
            id.DataType = "Integer";
            sro.Id = id;
            Header.Add(id);
            DefaultHeader.Add(id);

            // create entity
            HeaderItem entity = new HeaderItem();
            entity.DisplayName = "Type";
            entity.Name = "entity_name";
            entity.DataType = "string";
            Header.Add(entity);

            //DefaultHeader.Add(entity);

            foreach (XmlNode ade in headerItemXmlNodeList)
            {
                HeaderItem hi = new HeaderItem();
                hi = new HeaderItem();
                hi.Name = ade.Attributes.GetNamedItem("lucene_name").Value;
                hi.DisplayName = ade.Attributes.GetNamedItem("display_name").Value;
                Header.Add(hi);

                if (ade.Attributes.GetNamedItem("default_visible_item").Value.ToLower().Equals("yes"))
                {
                    DefaultHeader.Add(hi);
                }
            }

            List<Row> RowList = new List<Row>();
            string valueLastEntity = ""; // var to store last entity value
            bool moreThanOneEntityFound = false; // var to set, if more than one entity name was found


            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                Document doc = searcher.Doc(sd.Doc);
                Row r = new Row();
                List<object> ValueList = new List<object>();
                ValueList = new List<object>();
                ValueList.Add(doc.Get("doc_id"));
                ValueList.Add(doc.Get("gen_entity_name"));

                // check if there are more than one entities in the result list
                if (moreThanOneEntityFound == false && ValueList[1].ToString() != valueLastEntity && valueLastEntity != "")
                {
                    moreThanOneEntityFound = true;
                }
                valueLastEntity = ValueList[1].ToString();  

                foreach (XmlNode ade in headerItemXmlNodeList)
                {
                    String fieldType = ade.Attributes.GetNamedItem("type").Value;
                    String luceneName = ade.Attributes.GetNamedItem("lucene_name").Value;
                    if (fieldType.ToLower().Equals("facet_field"))
                    {
                        luceneName = "facet_" + luceneName;
                    }
                    else if (fieldType.ToLower().Equals("category_field"))
                    {
                        luceneName = "category_" + luceneName;
                    }
                    else if (fieldType.ToLower().Equals("property_field"))
                    {
                        luceneName = "property_" + luceneName;
                    }

                    ValueList.Add(doc.Get(luceneName));
                }
                r.Values = ValueList;
                RowList.Add(r);
            }
          
            // show column of entities, if there are more than one found
            if (moreThanOneEntityFound == true)
            {
                DefaultHeader.Add(entity);
            }

            sro.Header = Header;
            sro.DefaultVisibleHeaderItem = DefaultHeader;
            sro.Rows = RowList;
            return sro;
        }

예제 #13

0

파일 보기

파일: SearchController.cs 프로젝트: VAllens/Orchard2

        public async Task <IActionResult> Index(string id, string q, PagerParameters pagerParameters)
        {
            var siteSettings = await _siteService.GetSiteSettingsAsync();

            Pager pager = new Pager(pagerParameters, siteSettings.PageSize);

            var indexName = "Search";

            if (!String.IsNullOrWhiteSpace(id))
            {
                indexName = id;
            }

            if (!_luceneIndexProvider.Exists(indexName))
            {
                return(NotFound());
            }

            if (String.IsNullOrWhiteSpace(q))
            {
                return(View(new SearchIndexViewModel
                {
                    Pager = pager,
                    IndexName = id,
                    ContentItems = Enumerable.Empty <ContentItem>()
                }));
            }

            var luceneSettings = await _luceneIndexingService.GetLuceneSettingsAsync();

            if (luceneSettings == null)
            {
                Logger.LogInformation("Couldn't execute search. No Lucene settings was defined.");

                return(View(new SearchIndexViewModel
                {
                    HasMoreResults = false,
                    Query = q,
                    Pager = pager,
                    IndexName = id,
                    ContentItems = Enumerable.Empty <ContentItem>()
                }));
            }

            var queryParser = new MultiFieldQueryParser(LuceneSettings.DefaultVersion, luceneSettings.SearchFields, new StandardAnalyzer(LuceneSettings.DefaultVersion));
            var query       = queryParser.Parse(QueryParser.Escape(q));

            List <int> contentItemIds = new List <int>();

            _luceneIndexProvider.Search(indexName, searcher =>
            {
                // Fetch one more result than PageSize to generate "More" links
                TopScoreDocCollector collector = TopScoreDocCollector.Create(pager.PageSize + 1, true);

                searcher.Search(query, collector);
                TopDocs hits = collector.TopDocs(pager.GetStartIndex(), pager.PageSize + 1);

                foreach (var hit in hits.ScoreDocs)
                {
                    var d = searcher.Doc(hit.Doc, IdSet);
                    contentItemIds.Add(Convert.ToInt32(d.GetField("ContentItemId").StringValue));
                }
            });

            var contentItems = new List <ContentItem>();

            foreach (var contentItemId in contentItemIds.Take(pager.PageSize))
            {
                var contentItem = await _contentManager.GetAsync(contentItemId);

                if (contentItem != null)
                {
                    contentItems.Add(contentItem);
                }
            }

            var model = new SearchIndexViewModel
            {
                HasMoreResults = contentItemIds.Count > pager.PageSize,
                Query          = q,
                Pager          = pager,
                IndexName      = id,
                ContentItems   = contentItems
            };

            return(View(model));
        }

예제 #14

0

파일 보기

파일: BaseStoredFieldsFormatTestCase.cs 프로젝트: richard-green/lucenenet

        public virtual void TestRandomStoredFields()
        {
            Directory         dir  = NewDirectory();
            Random            rand = Random();
            RandomIndexWriter w    = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount   = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList <int?> fieldIDs = new List <int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.Tokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary <string, Document> docs = new Dictionary <string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();

            customType2.Stored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.StringValue = id;
                docs[id]            = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    fieldIDs = CollectionsHelper.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader   r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc    = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }

예제 #15

0

파일 보기

파일: BaseStoredFieldsFormatTestCase.cs 프로젝트: richard-green/lucenenet

        public void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory         dir    = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1  = new Document(); // lot of small fields
            Document bigDoc2  = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);

            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);

            onlyStored.Indexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int   numFields  = RandomInts.NextIntBetween(Random(), 500000, 1000000);

            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);

            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);

            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.StringValue = "" + i;
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd       = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(rd);

            for (int i = 0; i < numDocs; ++i)
            {
                Query   query   = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }

예제 #16

0

파일 보기

        /// <summary>
        /// Searches the specified phrase in the specified search fields.
        /// </summary>
        /// <param name="wiki">The wiki.</param>
        /// <param name="searchFields">The search fields.</param>
        /// <param name="phrase">The phrase to search.</param>
        /// <param name="searchOption">The search options.</param>
        /// <returns>A list of <see cref="SearchResult"/> items.</returns>
        public static List <SearchResult> Search(string wiki, SearchField[] searchFields, string phrase, SearchOptions searchOption)
        {
            IIndexDirectoryProviderV60 indexDirectoryProvider = Collectors.CollectorsBox.GetIndexDirectoryProvider(wiki);
            Analyzer analyzer = new SimpleAnalyzer();

            IndexSearcher searcher = new IndexSearcher(indexDirectoryProvider.GetDirectory(), false);

            string[] searchFieldsAsString     = (from f in searchFields select f.AsString()).ToArray();
            MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, searchFieldsAsString, analyzer);

            if (searchOption == SearchOptions.AllWords)
            {
                queryParser.DefaultOperator = QueryParser.Operator.AND;
            }
            if (searchOption == SearchOptions.AtLeastOneWord)
            {
                queryParser.DefaultOperator = QueryParser.Operator.OR;
            }
            if (searchOption == SearchOptions.ExactPhrase)
            {
                phrase = String.Format("\"{0}\"", phrase);
            }

            try {
                Query   query   = queryParser.Parse(phrase);
                TopDocs topDocs = searcher.Search(query, 100);

                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b class=\"searchkeyword\">", "</b>"), new QueryScorer(query));

                List <SearchResult> searchResults = new List <SearchResult>(topDocs.TotalHits);
                for (int i = 0; i < Math.Min(100, topDocs.TotalHits); i++)
                {
                    Document doc = searcher.Doc(topDocs.ScoreDocs[i].Doc);

                    SearchResult result = new SearchResult();
                    result.DocumentType = DocumentTypeFromString(doc.GetField(SearchField.DocumentType.AsString()).StringValue);
                    result.Relevance    = topDocs.ScoreDocs[i].Score * 100;
                    switch (result.DocumentType)
                    {
                    case DocumentType.Page:
                        PageDocument page = new PageDocument();
                        page.Wiki         = doc.GetField(SearchField.Wiki.AsString()).StringValue;
                        page.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                        page.Title        = doc.GetField(SearchField.Title.AsString()).StringValue;

                        TokenStream tokenStream1 = analyzer.TokenStream(SearchField.Title.AsString(), new StringReader(page.Title));
                        page.HighlightedTitle = highlighter.GetBestFragments(tokenStream1, page.Title, 3, " [...] ");

                        page.Content = doc.GetField(SearchField.Content.AsString()).StringValue;

                        tokenStream1            = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(page.Content));
                        page.HighlightedContent = highlighter.GetBestFragments(tokenStream1, page.Content, 3, " [...] ");

                        result.Document = page;
                        break;

                    case DocumentType.Message:
                        MessageDocument message = new MessageDocument();
                        message.Wiki         = doc.GetField(SearchField.Wiki.AsString()).StringValue;
                        message.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                        message.DateTime     = DateTime.Parse(doc.GetField(SearchField.MessageDateTime.AsString()).StringValue);
                        message.Subject      = doc.GetField(SearchField.Title.AsString()).StringValue;
                        message.Body         = doc.GetField(SearchField.Content.AsString()).StringValue;

                        TokenStream tokenStream2 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(message.Body));
                        message.HighlightedBody = highlighter.GetBestFragments(tokenStream2, message.Body, 3, " [...] ");

                        result.Document = message;
                        break;

                    case DocumentType.Attachment:
                        PageAttachmentDocument attachment = new PageAttachmentDocument();
                        attachment.Wiki         = doc.GetField(SearchField.Wiki.AsString()).StringValue;
                        attachment.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue;
                        attachment.FileName     = doc.GetField(SearchField.FileName.AsString()).StringValue;
                        attachment.FileContent  = doc.GetField(SearchField.FileContent.AsString()).StringValue;

                        TokenStream tokenStream3 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(attachment.FileContent));
                        attachment.HighlightedFileContent = highlighter.GetBestFragments(tokenStream3, attachment.FileContent, 3, " [...] ");

                        result.Document = attachment;
                        break;

                    case DocumentType.File:
                        FileDocument file = new FileDocument();
                        file.Wiki        = doc.GetField(SearchField.Wiki.AsString()).StringValue;
                        file.FileName    = doc.GetField(SearchField.FileName.AsString()).StringValue;
                        file.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue;

                        TokenStream tokenStream4 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(file.FileContent));
                        file.HighlightedFileContent = highlighter.GetBestFragments(tokenStream4, file.FileContent, 3, " [...]");

                        result.Document = file;
                        break;
                    }

                    searchResults.Add(result);
                }
                searcher.Dispose();
                return(searchResults);
            }
            catch (ParseException) {
                return(new List <SearchResult>(0));
            }
        }

예제 #17

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

        public void TestRegexQuery()
        {
            const int maxNumFragmentsRequired = 2;

            query = new RegexQuery(new Term(FIELD_NAME, "ken.*"));
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);

            var scorer = new QueryScorer(query, FIELD_NAME);
            var highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }

예제 #18

0

파일 보기

        /// <summary>
        ///
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="query"></param>
        /// <param name="headerItemXmlNodeList"></param>
        /// <returns></returns>
        public static SearchResult search(Query query, List <XmlNode> headerItemXmlNodeList)
        {
            TopDocs      docs = searcher.Search(query, 1000);
            SearchResult sro  = new SearchResult();

            sro.PageSize     = 10;
            sro.CurrentPage  = 1;
            sro.NumberOfHits = 100;

            List <HeaderItem> Header        = new List <HeaderItem>();
            List <HeaderItem> DefaultHeader = new List <HeaderItem>();

            // create id
            HeaderItem id = new HeaderItem();

            id.DisplayName = "ID";
            id.Name        = "ID";
            id.DataType    = "Integer";
            sro.Id         = id;
            Header.Add(id);
            DefaultHeader.Add(id);

            foreach (XmlNode ade in headerItemXmlNodeList)
            {
                HeaderItem hi = new HeaderItem();
                hi             = new HeaderItem();
                hi.Name        = ade.Attributes.GetNamedItem("lucene_name").Value;
                hi.DisplayName = ade.Attributes.GetNamedItem("display_name").Value;
                Header.Add(hi);

                if (ade.Attributes.GetNamedItem("default_visible_item").Value.ToLower().Equals("yes"))
                {
                    DefaultHeader.Add(hi);
                }
            }

            List <Row> RowList = new List <Row>();

            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                Document      doc       = searcher.Doc(sd.Doc);
                Row           r         = new Row();
                List <object> ValueList = new List <object>();
                ValueList = new List <object>();
                ValueList.Add(doc.Get("doc_id"));

                foreach (XmlNode ade in headerItemXmlNodeList)
                {
                    String fieldType  = ade.Attributes.GetNamedItem("type").Value;
                    String luceneName = ade.Attributes.GetNamedItem("lucene_name").Value;
                    if (fieldType.ToLower().Equals("facet_field"))
                    {
                        luceneName = "facet_" + luceneName;
                    }
                    else if (fieldType.ToLower().Equals("category_field"))
                    {
                        luceneName = "category_" + luceneName;
                    }
                    else if (fieldType.ToLower().Equals("property_field"))
                    {
                        luceneName = "property_" + luceneName;
                    }

                    ValueList.Add(doc.Get(luceneName));
                }
                r.Values = ValueList;
                RowList.Add(r);
            }

            sro.Header = Header;
            sro.DefaultVisibleHeaderItem = DefaultHeader;
            sro.Rows = RowList;
            return(sro);
        }

예제 #19

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

        public void TestConstantScoreMultiTermQuery()
        {

            numHighlights = 0;

            query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
            ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
            searcher = new IndexSearcher(ramDir, true);
            // can't rewrite ConstantScore if you want to highlight it -
            // it rewrites to ConstantScoreQuery which cannot be highlighted
            // query = unReWrittenQuery.Rewrite(reader);
            Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
            hits = searcher.Search(query, null, 1000);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try null field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, null);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try default field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }

예제 #20

0

파일 보기

        /// <summary>
        ///
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="origQuery"></param>
        /// <param name="queryFilter"></param>
        /// <param name="searchtext"></param>
        /// <returns></returns>
        public static IEnumerable <TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext)
        {
            String       filter = queryFilter;
            BooleanQuery query  = new BooleanQuery();

            query.Add(origQuery, Occur.MUST);
            if (!filter.ToLower().StartsWith("ng_"))
            {
                filter = "ng_" + filter;
            }
            if (filter.ToLower().Equals("ng_all"))
            {
                filter      = "ng_all";
                queryFilter = "ng_all";
            }
            HashSet <string> uniqueText = new HashSet <string>();

            searchtext = searchtext.ToLower();
            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer());

            parser.DefaultOperator = QueryParser.Operator.AND;
            Query X1 = parser.Parse(searchtext);

            query.Add(X1, Occur.MUST);
            // Query query = parser.Parse("tree data");
            TopDocs          tds                  = searcher.Search(query, 50);
            QueryScorer      scorer               = new QueryScorer(query, searchtext);
            Analyzer         analyzer             = new NGramAnalyzer();
            List <TextValue> autoCompleteTextList = new List <TextValue>();

            foreach (ScoreDoc sd in tds.ScoreDocs)
            {
                Document    doc     = searcher.Doc(sd.Doc);
                String      docId   = doc.GetField("doc_id").StringValue;
                TermQuery   q1      = new TermQuery(new Term("id", docId.ToLower()));
                TermQuery   q0      = new TermQuery(new Term("field", queryFilter.ToLower()));
                QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer());
                parser1.DefaultOperator = QueryParser.Operator.AND;
                Query        q2 = parser1.Parse(searchtext);
                BooleanQuery q3 = new BooleanQuery();
                q3.Add(q1, Occur.MUST);
                q3.Add(q2, Occur.MUST);
                q3.Add(q0, Occur.MUST);
                TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100);
                foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs)
                {
                    Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc);
                    String   toAdd       = docAutoComp.GetField("value").StringValue;
                    if (!uniqueText.Contains(toAdd))
                    {
                        TextValue tv = new TextValue();
                        tv.Name  = toAdd;
                        tv.Value = toAdd;
                        autoCompleteTextList.Add(tv);
                        uniqueText.Add(toAdd);
                    }
                }

                if (autoCompleteTextList.Count > 7)
                {
                    break;
                }
            }
            return(autoCompleteTextList);
        }

예제 #21

0

파일 보기

        public void _Search()
        {
            //string request = (searchParams as string);
            string old_request = "";
            string new_request = "";

            while (true)
            {
                lock (_request)
                {
                    new_request = _request;
                }

                if (new_request != old_request)
                {
                    old_request = new_request;
                    if (new_request.Length != 0)
                    {
                        IndexReader reader    = null;
                        Stopwatch   stopWatch = new Stopwatch();
                        stopWatch.Start();

                        string index_path = Owl.Properties.Settings.Default.IndexPath;
                        try
                        {
                            reader = IndexReader.Open(index_path);
                        }
                        catch
                        {
                            status.SearchStatus = string.Format("Problems while opening Index: has it been created in {0} ?", Owl.Properties.Settings.Default.IndexPath);
                        }
                        int nb_docs    = 0;
                        int found_docs = 0;
                        if (reader != null)
                        {
                            try
                            {
                                Searcher searcher = new IndexSearcher(reader);
                                Analyzer analyzer = new StandardAnalyzer();
                                //QueryParser parser = new QueryParser("contents", analyzer);
                                MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "contents", "path" }, analyzer);

                                Query query = parser.Parse(new_request);

                                SimpleCall sc = delegate() { resultItems.Clear(); };
                                Application.Current.Dispatcher.Invoke(DispatcherPriority.Background, sc);

                                //Hits hits = searcher.Search(query);
                                TopDocs docs    = searcher.Search(query, null, 100);
                                int     num_doc = 1;
                                foreach (ScoreDoc score_doc in docs.scoreDocs)
                                {
                                    if (HasRequestChanged(new_request))
                                    {
                                        break;
                                    }

                                    Document      doc  = searcher.Doc(score_doc.doc);
                                    System.String path = doc.Get("path");
                                    //
                                    SimpleCall sc2 = delegate()
                                    {
                                        resultItems.Add(new Result(string.Format("{0} - {2} ({1})%\n{3}",
                                                                                 num_doc++, (int)((score_doc.score * 100) / docs.GetMaxScore()),
                                                                                 System.IO.Path.GetFileName(path), System.IO.Path.GetDirectoryName(path)), path));
                                    };
                                    Application.Current.Dispatcher.Invoke(DispatcherPriority.Background, sc2);

                                    //
                                }
                                found_docs = docs.scoreDocs.Length;
                                nb_docs    = reader.NumDocs();
                                searcher.Close();
                            }
                            //catch (TokenMgrError)
                            //{ }
                            catch (Exception e)
                            {
                                status.SearchStatus = string.Format("Problems with request {0} ", new_request);
                                Log.Error(e);
                            }
                            finally
                            {
                                reader.Close();
                                stopWatch.Stop();
                            }
                        }
                        //---
                        status.SearchStatus = string.Format("{0} results for '{3}' in {1} docs (took {2} ms)", found_docs,
                                                            nb_docs, stopWatch.ElapsedMilliseconds, new_request);
                    }
                }
                else
                {
                    _mre.Reset();
                    _mre.WaitOne();
                }
                //Thread.Sleep(250);
            }
        }

예제 #22

0

파일 보기

파일: AuditLogController.cs 프로젝트: yulongcs/SitecoreSidekick

        private IEnumerable <IAuditEntry> getResults(TopDocs ids, int page, IndexSearcher searcher)
        {
            int skip = page * 20;

            return(ids.ScoreDocs.Reverse().Skip(skip).Take(20).Select(x => new BasicAuditEntry(searcher.Doc(x.Doc), x.Doc)));
        }

예제 #23

0

파일 보기

            public override void Run()
            {
                try
                {
                    IndexReader   lastReader   = null;
                    IndexSearcher lastSearcher = null;

                    while (operations.DecrementAndGet() >= 0)
                    {
                        // bias toward a recently changed doc
                        int id = rand.Next(100) < 25 ? outerInstance.lastId : rand.Next(ndocs);

                        // when indexing, we update the index, then the model
                        // so when querying, we should first check the model, and then the index

                        long            val;
                        DirectoryReader r;
                        lock (outerInstance)
                        {
                            val = outerInstance.committedModel[id];
                            r   = outerInstance.reader;
                            r.IncRef();
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": s id=" + id + " val=" + val + " r=" + r.Version);
                        }

                        //  sreq = req("wt","json", "q","id:"+Integer.toString(id), "omitHeader","true");
                        IndexSearcher searcher;
                        if (r == lastReader)
                        {
                            // Just re-use lastSearcher, else
                            // newSearcher may create too many thread
                            // pools (ExecutorService):
                            searcher = lastSearcher;
                        }
                        else
                        {
                            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                                outerInstance,
#endif
                                r);
                            lastReader   = r;
                            lastSearcher = searcher;
                        }
                        Query   q       = new TermQuery(new Term("id", Convert.ToString(id)));
                        TopDocs results = searcher.Search(q, 10);

                        if (results.TotalHits == 0 && tombstones)
                        {
                            // if we couldn't find the doc, look for its tombstone
                            q       = new TermQuery(new Term("id", "-" + Convert.ToString(id)));
                            results = searcher.Search(q, 1);
                            if (results.TotalHits == 0)
                            {
                                if (val == -1L)
                                {
                                    // expected... no doc was added yet
                                    r.DecRef();
                                    continue;
                                }
                                Assert.Fail("No documents or tombstones found for id " + id + ", expected at least " + val + " reader=" + r);
                            }
                        }

                        if (results.TotalHits == 0 && !tombstones)
                        {
                            // nothing to do - we can't tell anything from a deleted doc without tombstones
                        }
                        else
                        {
                            // we should have found the document, or its tombstone
                            if (results.TotalHits != 1)
                            {
                                Console.WriteLine("FAIL: hits id:" + id + " val=" + val);
                                foreach (ScoreDoc sd in results.ScoreDocs)
                                {
                                    Document doc = r.Document(sd.Doc);
                                    Console.WriteLine("  docID=" + sd.Doc + " id:" + doc.Get("id") + " foundVal=" + doc.Get(outerInstance.field));
                                }
                                Assert.Fail("id=" + id + " reader=" + r + " totalHits=" + results.TotalHits);
                            }
                            Document doc_     = searcher.Doc(results.ScoreDocs[0].Doc);
                            long     foundVal = Convert.ToInt64(doc_.Get(outerInstance.field));
                            if (foundVal < Math.Abs(val))
                            {
                                Assert.Fail("foundVal=" + foundVal + " val=" + val + " id=" + id + " reader=" + r);
                            }
                        }

                        r.DecRef();
                    }
                }
                catch (Exception e)
                {
                    operations.Value = ((int)-1L);
                    Console.WriteLine(Thread.CurrentThread.Name + ": FAILED: unexpected exception");
                    Console.WriteLine(e.StackTrace);
                    throw new Exception(e.Message, e);
                }
            }

예제 #24

0

파일 보기

        public void assertFromTestData(int[] codePointTable)
        {
            if (VERBOSE)
            {
                Console.WriteLine("TEST: codePointTable=" + codePointTable);
            }
            Stream     stream = GetType().getResourceAsStream("fuzzyTestData.txt");
            TextReader reader = new StreamReader(stream, Encoding.UTF8);

            int bits  = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
            int terms = (int)Math.Pow(2, bits);

            Store.Directory   dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));

            Document doc   = new Document();
            Field    field = NewTextField("field", "", Field.Store.NO);

            doc.Add(field);

            for (int i = 0; i < terms; i++)
            {
                field.SetStringValue(MapInt(codePointTable, i));
                writer.AddDocument(doc);
            }

            IndexReader   r        = writer.Reader;
            IndexSearcher searcher = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + searcher);
            }
            // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
            // otherwise scores are different!
            searcher.Similarity = (new DefaultSimilarity());

            writer.Dispose();
            String line;
            int    lineNum = 0;

            while ((line = reader.ReadLine()) != null)
            {
                lineNum++;
                String[] @params  = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                String   query    = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
                int      prefix   = int.Parse(@params[1], CultureInfo.InvariantCulture);
                int      pqSize   = int.Parse(@params[2], CultureInfo.InvariantCulture);
                float    minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
#pragma warning disable 612, 618
                SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
#pragma warning restore 612, 618
                q.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize);
                int     expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
                TopDocs docs            = searcher.Search(q, expectedResults);
                assertEquals(expectedResults, docs.TotalHits);
                for (int i = 0; i < expectedResults; i++)
                {
                    String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                    assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
                    assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
                }
            }
            r.Dispose();
            dir.Dispose();
        }

예제 #25

0

파일 보기

        public void TestSimple()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.Reader);

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
                                                       indexSearcher, ScoreMode.None);

            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(4, result.ScoreDocs[0].Doc);
            assertEquals(5, result.ScoreDocs[1].Doc);

            joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(1, result.ScoreDocs[0].Doc);
            assertEquals(2, result.ScoreDocs[1].Doc);

            // Search for offer
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(1, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

예제 #26

0

파일 보기

 /// <summary>
 /// Highlights the top passages from a single field.
 /// </summary>
 /// <param name="field">field name to highlight. Must have a stored string value and also be indexed with offsets.</param>
 /// <param name="query">query to highlight.</param>
 /// <param name="searcher">searcher that was previously used to execute the query.</param>
 /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
 /// <returns>
 /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>.
 /// If no highlights were found for a document, the
 /// first sentence for the field will be returned.
 /// </returns>
 /// <exception cref="IOException">if an I/O error occurred during processing</exception>
 /// <exception cref="ArgumentException">if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
 public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs)
 {
     return(Highlight(field, query, searcher, topDocs, 1));
 }

예제 #27

0

파일 보기

파일: Index.cs 프로젝트: philiphoy/ravendb

            private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start)
            {
                if (fieldsToFetch.IsDistinctQuery == false)
                {
                    return;
                }

                // add results that were already there in previous pages
                var min = Math.Min(start, search.totalHits);

                for (int i = 0; i < min; i++)
                {
                    Document document         = indexSearcher.Doc(search.scoreDocs[i].doc);
                    var      indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch);
                    alreadyReturned.Add(indexQueryResult.Projection);
                }
            }

예제 #28

0

파일 보기

        /// <summary>
        /// Highlights the top-N passages from a single field.
        /// </summary>
        /// <param name="field">
        /// field name to highlight.
        /// Must have a stored string value and also be indexed with offsets.
        /// </param>
        /// <param name="query">query to highlight.</param>
        /// <param name="searcher">searcher that was previously used to execute the query.</param>
        /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
        /// <param name="maxPassages">The maximum number of top-N ranked passages used to form the highlighted snippets.</param>
        /// <returns>
        /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>.
        /// If no highlights were found for a document, the
        /// first <paramref name="maxPassages"/> sentences from the
        /// field will be returned.
        /// </returns>
        /// <exception cref="IOException">if an I/O error occurred during processing</exception>
        /// <exception cref="ArgumentException">Illegal if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
        public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages)
        {
            IDictionary <string, string[]> res = HighlightFields(new string[] { field }, query, searcher, topDocs, new int[] { maxPassages });

            string[] result;
            res.TryGetValue(field, out result);
            return(result);
        }

예제 #29

0

파일 보기

파일: CommonTermsQueryTest.cs 프로젝트: ywscr/lucenenet

        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            string[] docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 4.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                // doc 2 and 3 only get a score from low freq terms
                assertEquals(
                    new JCG.HashSet <string> {
                    @"2", @"3"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[2].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(
                    new JCG.HashSet <string> {
                    @"0", @"2"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[0].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

예제 #30

0

파일 보기

 /// <summary>
 /// Highlights the top passages from multiple fields.
 /// <para/>
 /// Conceptually, this behaves as a more efficient form of:
 /// <code>
 /// IDictionary&lt;string, string[]&gt; m = new Dictionary&lt;string, string[]&gt;();
 /// foreach (string field in fields)
 /// {
 ///     m[field] = Highlight(field, query, searcher, topDocs);
 /// }
 /// return m;
 /// </code>
 /// </summary>
 /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
 /// <param name="query">query to highlight.</param>
 /// <param name="searcher">searcher that was previously used to execute the query.</param>
 /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
 /// <returns>
 /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets
 /// corresponding to the documents in <paramref name="topDocs"/>.
 /// If no highlights were found for a document, the
 /// first sentence from the field will be returned.
 /// </returns>
 /// <exception cref="IOException">if an I/O error occurred during processing</exception>
 /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
 public virtual IDictionary <string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs)
 {
     int[] maxPassages = new int[fields.Length];
     Arrays.Fill(maxPassages, 1);
     return(HighlightFields(fields, query, searcher, topDocs, maxPassages));
 }

예제 #31

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

 public void DoSearching(Query unReWrittenQuery)
 {
     searcher = new IndexSearcher(ramDir, true);
     // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
     // you must use a rewritten query!
     query = unReWrittenQuery.Rewrite(reader);
     Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
     hits = searcher.Search(query, null, 1000);
 }

예제 #32

0

파일 보기

        /// <summary>
        /// Highlights the top-N passages from multiple fields.
        /// <para/>
        /// Conceptually, this behaves as a more efficient form of:
        /// <code>
        /// IDictionary&lt;string, string[]&gt; m = new Dictionary&lt;string, string[]&gt;();
        /// foreach (string field in fields)
        /// {
        ///     m[field] = Highlight(field, query, searcher, topDocs, maxPassages);
        /// }
        /// return m;
        /// </code>
        /// </summary>
        /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
        /// <param name="query">query to highlight.</param>
        /// <param name="searcher">searcher that was previously used to execute the query.</param>
        /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
        /// <param name="maxPassages">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
        /// <returns>
        /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets
        /// corresponding to the documents in <paramref name="topDocs"/>.
        /// If no highlights were found for a document, the
        /// first <paramref name="maxPassages"/> sentences from the
        /// field will be returned.
        /// </returns>
        /// <exception cref="IOException">if an I/O error occurred during processing</exception>
        /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
        public virtual IDictionary <string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs, int[] maxPassages)
        {
            ScoreDoc[] scoreDocs = topDocs.ScoreDocs;
            int[]      docids    = new int[scoreDocs.Length];
            for (int i = 0; i < docids.Length; i++)
            {
                docids[i] = scoreDocs[i].Doc;
            }

            return(HighlightFields(fields, query, searcher, docids, maxPassages));
        }

예제 #33

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

        public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher,
                                         TopDocs hits, Query query, IFormatter formatter, bool expandMT)
        {
            IFragmenter frag = new SimpleFragmenter(20);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                IScorer scorer = null;
                TokenStream tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
                if (Mode == QUERY)
                {
                    scorer = new QueryScorer(query);
                }
                else if (Mode == QUERY_TERM)
                {
                    scorer = new QueryTermScorer(query);
                }
                var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = frag};

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
        }

예제 #34

0

파일 보기

파일: Rescorer.cs 프로젝트: Cefa68000/lucenenet

 /// <summary>
 /// Rescore an initial first-pass <seealso cref="TopDocs"/>.
 /// </summary>
 /// <param name="searcher"> <seealso cref="IndexSearcher"/> used to produce the
 ///   first pass topDocs </param>
 /// <param name="firstPassTopDocs"> Hits from the first pass
 ///   search.  It's very important that these hits were
 ///   produced by the provided searcher; otherwise the doc
 ///   IDs will not match! </param>
 /// <param name="topN"> How many re-scored hits to return </param>
 public abstract TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN);

예제 #35

0

파일 보기

파일: HighlighterTest.cs 프로젝트: hanabi1224/lucene.net

        public void TestNumericRangeQuery()
        {
            // doesn't currently highlight, but make sure it doesn't cause exception either
            query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);
            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                //Console.WriteLine("\t" + result);
            }


        }

예제 #36

0

파일 보기

파일: LuceneSearchResults.cs 프로젝트: gitter-badger/vc-community-1.x

        /// <summary>
        /// Creates result document collection from Lucene documents.
        /// </summary>
        /// <param name="searcher">The searcher.</param>
        /// <param name="topDocs">The hits.</param>
        private void CreateDocuments(Searcher searcher, TopDocs topDocs)
        {
            // if no documents found return
            if (topDocs == null) return;

            var entries = new List<ResultDocument>();

            // get total hits
            var totalCount = topDocs.TotalHits;
            var recordsToRetrieve = Results.SearchCriteria.RecordsToRetrieve;
            var startIndex = Results.SearchCriteria.StartingRecord;
            if (recordsToRetrieve > totalCount) recordsToRetrieve = totalCount;

            for (var index = startIndex; index < startIndex + recordsToRetrieve; index++)
            {
                if (index >= totalCount) break;

                var document = searcher.Doc(topDocs.ScoreDocs[index].Doc);
                var doc = new ResultDocument();

                var documentFields = document.GetFields();
                using (var fi = documentFields.GetEnumerator())
                {
                    while (fi.MoveNext())
                    {
                        if (fi.Current != null)
                        {
                            var field = fi.Current;

                            // make sure document field doens't exist, if it does, simply add another value
                            if (doc.ContainsKey(field.Name))
                            {
                                var existingField = doc[field.Name] as DocumentField;
                                if (existingField != null) existingField.AddValue(field.StringValue);
                            }
                            else // add new
                            {
                                doc.Add(new DocumentField(field.Name, field.StringValue));
                            }
                        }
                    }
                }

                entries.Add(doc);
            }

            var searchDocuments = new ResultDocumentSet
            {
                Name = "Items",
                Documents = entries.ToArray(),
                TotalCount = totalCount
            };

            Results.Documents = new[] { searchDocuments };
        }

예제 #37

0

파일 보기

        /**
         * Split a given index into 3 indexes for training, test and cross validation tasks respectively
         *
         * @param originalIndex        an {@link AtomicReader} on the source index
         * @param trainingIndex        a {@link Directory} used to write the training index
         * @param testIndex            a {@link Directory} used to write the test index
         * @param crossValidationIndex a {@link Directory} used to write the cross validation index
         * @param analyzer             {@link Analyzer} used to create the new docs
         * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
         * @throws IOException if any writing operation fails on any of the indexes
         */
        public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames)
        {
            // create IWs for train / test / cv IDXs
            IndexWriter testWriter     = new IndexWriter(testIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter cvWriter       = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));

            try
            {
                int size = originalIndex.MaxDoc;

                IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
                TopDocs       topDocs       = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue);

                // set the type to be indexed, stored, with term vectors
                FieldType ft = new FieldType(TextField.TYPE_STORED);
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = true;
                ft.StoreTermVectorPositions = true;

                int b = 0;

                // iterate over existing documents
                foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                {
                    // create a new document for indexing
                    Document doc = new Document();
                    if (fieldNames != null && fieldNames.Length > 0)
                    {
                        foreach (String fieldName in fieldNames)
                        {
                            doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft));
                        }
                    }
                    else
                    {
                        foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields)
                        {
                            if (storableField.ReaderValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.ReaderValue, ft));
                            }
                            else if (storableField.BinaryValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.BinaryValue, ft));
                            }
                            else if (storableField.StringValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.StringValue, ft));
                            }
                            else if (storableField.NumericValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.NumericValue.ToString(), ft));
                            }
                        }
                    }

                    // add it to one of the IDXs
                    if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
                    {
                        testWriter.AddDocument(doc);
                    }
                    else if (cvWriter.MaxDoc < size * _crossValidationRatio)
                    {
                        cvWriter.AddDocument(doc);
                    }
                    else
                    {
                        trainingWriter.AddDocument(doc);
                    }
                    b++;
                }
            }
            catch (Exception e)
            {
                throw new IOException("Exceptio in DatasetSplitter", e);
            }
            finally
            {
                testWriter.Commit();
                cvWriter.Commit();
                trainingWriter.Commit();
                // close IWs
                testWriter.Dispose();
                cvWriter.Dispose();
                trainingWriter.Dispose();
            }
        }

예제 #38

0

파일 보기

파일: TestIndexableField.cs 프로젝트: zhuthree/lucenenet

        public virtual void TestArbitraryFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int NUM_DOCS = AtLeast(27);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + NUM_DOCS + " docs");
            }
            int[] fieldsPerDoc = new int[NUM_DOCS];
            int   baseCount    = 0;

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int fieldCount = TestUtil.NextInt32(Random, 1, 17);
                fieldsPerDoc[docCount] = fieldCount - 1;

                int finalDocCount = docCount;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount);
                }

                int finalBaseCount = baseCount;
                baseCount += fieldCount - 1;

                w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount));
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           counter = 0;

            for (int id = 0; id < NUM_DOCS; id++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
                }
                TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1);
                Assert.AreEqual(1, hits.TotalHits);
                int      docID      = hits.ScoreDocs[0].Doc;
                Document doc        = s.Doc(docID);
                int      endCounter = counter + fieldsPerDoc[id];
                while (counter < endCounter)
                {
                    string name    = "f" + counter;
                    int    fieldID = counter % 10;

                    bool stored  = (counter & 1) == 0 || fieldID == 3;
                    bool binary  = fieldID == 3;
                    bool indexed = fieldID != 3;

                    string stringValue;
                    if (fieldID != 3 && fieldID != 9)
                    {
                        stringValue = "text " + counter;
                    }
                    else
                    {
                        stringValue = null;
                    }

                    // stored:
                    if (stored)
                    {
                        IIndexableField f = doc.GetField(name);
                        Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                        if (binary)
                        {
                            Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                            BytesRef b = f.GetBinaryValue();
                            Assert.IsNotNull(b);
                            Assert.AreEqual(10, b.Length);
                            for (int idx = 0; idx < 10; idx++)
                            {
                                Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]);
                            }
                        }
                        else
                        {
                            Debug.Assert(stringValue != null);
                            Assert.AreEqual(stringValue, f.GetStringValue());
                        }
                    }

                    if (indexed)
                    {
                        bool tv = counter % 2 == 1 && fieldID != 9;
                        if (tv)
                        {
                            Terms tfv = r.GetTermVectors(docID).GetTerms(name);
                            Assert.IsNotNull(tfv);
                            TermsEnum termsEnum = tfv.GetIterator(null);
                            Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(1, dpEnum.NextPosition());

                            Assert.AreEqual(new BytesRef("text"), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(0, dpEnum.NextPosition());

                            Assert.IsNull(termsEnum.Next());

                            // TODO: offsets
                        }
                        else
                        {
                            Fields vectors = r.GetTermVectors(docID);
                            Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null);
                        }

                        BooleanQuery bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST);
                        TopDocs hits2 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits2.TotalHits);
                        Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc);

                        bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST);
                        TopDocs hits3 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits3.TotalHits);
                        Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc);
                    }

                    counter++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }

예제 #39

0

파일 보기

파일: Searcher.cs 프로젝트: kevintavog/FindAPhoto-1.net

        private void RetrieveMatches(
            ISearchWrapper wrapper, TopDocs hits, IList<MatchingDocument> matchList, 
            Func<MatchingDocument,bool> doesMatch, int first, int count)
        {
            var scoreDocs = hits.scoreDocs;
            // Find the earliest acceptable document
            int docIndex = 0;
            Document doc;
            do
            {
                doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc);
                if (doesMatch(new MatchingDocument(doc)))
                    break;
                ++docIndex;
            } while (docIndex < scoreDocs.Length);

            // Find the document that matches 'first' index
            int matchIndex = 0;
            while (matchIndex < first && docIndex < scoreDocs.Length)
            {
                doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc);
                ++docIndex;
                if (doesMatch(new MatchingDocument(doc)))
                    ++matchIndex;
            }

            // Find the remaining matches
            while (matchList.Count < count && docIndex < scoreDocs.Length)
            {
                doc = wrapper.IndexSearcher.doc(scoreDocs[docIndex].doc);
                ++docIndex;
                var matchingDoc = new MatchingDocument(doc);
                if (doesMatch(matchingDoc))
                {
                    matchList.Add(matchingDoc);
                }
            }
        }

예제 #40

0

파일 보기

파일: LuceneLightweightSearcher.cs 프로젝트: fniwes/n2cms

        protected override Result <LightweightHitData> CreateResults(N2.Persistence.Search.Query query, IndexSearcher s, TopDocs hits)
        {
            var result = new Result <LightweightHitData>();

            result.Total = hits.TotalHits;
            var resultHits = hits.ScoreDocs.Skip(query.SkipHits).Take(query.TakeHits).Select(hit =>
            {
                var doc = s.Doc(hit.Doc);
                int id  = int.Parse(doc.Get("ID"));
                return(new Hit <LightweightHitData>
                {
                    Content = new LightweightHitData
                    {
                        ID = id,
                        AlteredPermissions = (Security.Permission) int.Parse(doc.Get("AlteredPermissions")),
                        State = (ContentState)int.Parse(doc.Get("State")),
                        Visible = Convert.ToBoolean(doc.Get("Visible")),
                        AuthorizedRoles = doc.Get("Roles").Split(' '),
                        Path = doc.Get("Path")
                    },
                    Title = doc.Get("Title"),
                    Url = doc.Get("Url"),
                    Score = hit.Score
                });
            }).ToList();

            result.Hits  = resultHits;
            result.Count = resultHits.Count;
            return(result);
        }

예제 #41

0

파일 보기

파일: TestRollingUpdates.cs 프로젝트: clieben/lucenenet

        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random.Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues);

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).SetStringValue(myID);

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = LuceneTestCase.Random.NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs);

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.GetSizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }

예제 #42

0

파일 보기

        private static IList <SearchResult> RetrieveSearchResults(IndexSearcher indexSearcher, TopDocs hits)
        {
            //iterate over the results.
            var results = hits.ScoreDocs.AsQueryable()
                          .OrderByDescending(hit => hit.Score)
                          .ToList()
                          .Select(hit =>
            {
                var jsonDocumentField = indexSearcher.Doc(hit.Doc).GetField(Constants.JsonDocumentFieldName);

                var fieldDoc = (hit as FieldDoc);
                if (fieldDoc != null && Double.IsNaN(hit.Score) && fieldDoc.fields.Length > 0 && fieldDoc.fields[0] is float)
                {
                    hit.Score = (float)fieldDoc.fields[0];            //TODO: is this really true?
                }
                if (jsonDocumentField == null)
                {
                    return new SearchResult
                    {
                        Score       = hit.Score,
                        LuceneDocId = hit.Doc,
                        Document    = null
                    }
                }
                ;

                return(new SearchResult
                {
                    Score = hit.Score,
                    LuceneDocId = hit.Doc,
                    Document = JsonConvert.DeserializeObject <JsonDocumentDto>(jsonDocumentField.StringValue)
                });
            })
                          .ToList();

            return(results);
        }

C# (CSharp) TopDocs 예제들