Exemple #1
0
        public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            var index = database.IndexStorage.GetIndexInstance(query.IndexName);

            if (index == null)
            {
                throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
            }

            if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
            {
                throw new InvalidOperationException("The document id or map group fields are mandatory");
            }

            IndexSearcher searcher;

            using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrEmpty(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (string key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                var ir  = searcher.IndexReader;
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, query);

                if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
                {
                    var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
                    }

                    var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>();
                    if (stopWordsSetup.StopWords != null)
                    {
                        var stopWords = stopWordsSetup.StopWords;
                        var ht        = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                        foreach (var stopWord in stopWords)
                        {
                            ht.Add(stopWord);
                        }
                        mlt.SetStopWords(ht);
                    }
                }

                var fieldNames = query.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.Analyzer            = perFieldAnalyzerWrapper;

                    var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
                    var tsdc     = TopScoreDocCollector.Create(pageSize, true);


                    if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false)
                    {
                        var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, perFieldAnalyzerWrapper);
                        mltQuery = new BooleanQuery
                        {
                            { mltQuery, Occur.MUST },
                            { additionalQuery, Occur.MUST },
                        };
                    }

                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray()));
                    includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, query.Includes ?? new string[0], loadedIds);

                    idsToLoad = new HashSet <string>();

                    database.TransactionalStorage.Batch(actions =>
                    {
                        documentRetriever = new DocumentRetriever(database.Configuration, actions, database.ReadTriggers, query.TransformerParameters, idsToLoad);

                        using (new CurrentTransformationScope(database, documentRetriever))
                        {
                            foreach (var document in ProcessResults(query, jsonDocuments, database.WorkContext.CancellationToken))
                            {
                                result.Results.Add(document);
                                addIncludesCommand.Execute(document);
                            }
                        }
                    });

                    addIncludesCommand.AlsoInclude(idsToLoad);

                    var  computeHash  = Encryptor.Current.Hash.Compute16(includedEtags.ToArray());
                    Etag computedEtag = Etag.Parse(computeHash);

                    return(new MoreLikeThisQueryResult
                    {
                        Etag = computedEtag,
                        Result = result,
                    });
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }
        public IEnumerable <Document> MoreLikeThis(
            MoreLikeThisQueryServerSide query,
            HashSet <string> stopWords,
            Func <SelectField[], IQueryResultRetriever> createRetriever,
            JsonOperationContext context,
            Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            int?baseDocId = null;

            if (string.IsNullOrWhiteSpace(query.DocumentId) == false || query.MapGroupFields.Count > 0)
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrWhiteSpace(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.Documents.Indexing.Fields.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (var key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = _searcher.Search(documentQuery, 1, _state);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                baseDocId = td.ScoreDocs[0].Doc;
            }

            var ir  = _searcher.IndexReader;
            var mlt = new RavenMoreLikeThis(ir, query, _state);

            AssignParameters(mlt, query);

            if (stopWords != null)
            {
                mlt.SetStopWords(stopWords);
            }

            string[] fieldNames;
            if (query.Fields != null && query.Fields.Length > 0)
            {
                fieldNames = query.Fields;
            }
            else
            {
                fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED)
                             .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName)
                             .ToArray();
            }

            mlt.SetFieldNames(fieldNames);
            mlt.Analyzer = _analyzer;

            var pageSize = GetPageSize(_searcher, query.PageSize);



            Query mltQuery;

            if (baseDocId.HasValue)
            {
                mltQuery = mlt.Like(baseDocId.Value);
            }
            else
            {
                using (var blittableJson = ParseJsonStringIntoBlittable(query.Document, context))
                    mltQuery = mlt.Like(blittableJson);
            }

            var tsdc = TopScoreDocCollector.Create(pageSize, true);

            if (query.Metadata.WhereFields.Count > 0)
            {
                var additionalQuery = QueryBuilder.BuildQuery(context, query.Metadata, query.Metadata.Query.Where, null, _analyzer, getSpatialField);

                mltQuery = new BooleanQuery
                {
                    { mltQuery, Occur.MUST },
                    { additionalQuery, Occur.MUST }
                };
            }

            _searcher.Search(mltQuery, tsdc, _state);
            var hits = tsdc.TopDocs().ScoreDocs;

            var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase);

            var retriever = createRetriever(null);

            foreach (var hit in hits)
            {
                if (hit.Doc == baseDocId)
                {
                    continue;
                }

                var doc = _searcher.Doc(hit.Doc, _state);
                var id  = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state);
                if (id == null)
                {
                    continue;
                }

                if (ids.Add(id) == false)
                {
                    continue;
                }

                yield return(retriever.Get(doc, hit.Score, _state));
            }
        }
        public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25, string[] include = null)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            var index = database.IndexStorage.GetIndexInstance(query.IndexName);

            if (index == null)
            {
                throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
            }

            if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
            {
                throw new InvalidOperationException("The document id or map group fields are mandatory");
            }

            IndexSearcher searcher;

            using (database.IndexStorage.GetCurrentIndexSearcher(query.IndexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrEmpty(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (string key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                var ir  = searcher.IndexReader;
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, query);

                if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
                {
                    var stopWordsDoc = database.Get(query.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
                    }

                    var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>();
                    if (stopWordsSetup.StopWords != null)
                    {
                        var stopWords = stopWordsSetup.StopWords;
                        var ht        = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
                        foreach (var stopWord in stopWords)
                        {
                            ht[stopWord] = stopWord;
                        }
                        mlt.SetStopWords(ht);
                    }
                }

                var fieldNames = query.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
                    var tsdc     = TopScoreDocCollector.Create(pageSize, true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(database.GetIndexEtag(query.IndexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, include ?? new string[0], loadedIds);

                    foreach (var jsonDocument in jsonDocuments)
                    {
                        result.Results.Add(jsonDocument.ToJson());
                        addIncludesCommand.Execute(jsonDocument.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    return(new MoreLikeThisQueryResult
                    {
                        Etag = computedEtag,
                        Result = result,
                    });
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }
        public IEnumerable <Document> MoreLikeThis(
            IndexQueryServerSide query,
            IQueryResultRetriever retriever,
            DocumentsOperationContext context,
            CancellationToken token)
        {
            IDisposable releaseServerContext          = null;
            IDisposable closeServerTransaction        = null;
            TransactionOperationContext serverContext = null;
            MoreLikeThisQuery           moreLikeThisQuery;

            try
            {
                if (query.Metadata.HasCmpXchg)
                {
                    releaseServerContext   = context.DocumentDatabase.ServerStore.ContextPool.AllocateOperationContext(out serverContext);
                    closeServerTransaction = serverContext.OpenReadTransaction();
                }

                using (closeServerTransaction)
                    moreLikeThisQuery = QueryBuilder.BuildMoreLikeThisQuery(serverContext, context, query.Metadata, query.Metadata.Query.Where, query.QueryParameters, _analyzer, _queryBuilderFactories);
            }
            finally
            {
                releaseServerContext?.Dispose();
            }

            var options = moreLikeThisQuery.Options != null?JsonDeserializationServer.MoreLikeThisOptions(moreLikeThisQuery.Options) : MoreLikeThisOptions.Default;

            HashSet <string> stopWords = null;

            if (string.IsNullOrWhiteSpace(options.StopWordsDocumentId) == false)
            {
                var stopWordsDoc = context.DocumentDatabase.DocumentsStorage.Get(context, options.StopWordsDocumentId);
                if (stopWordsDoc == null)
                {
                    throw new InvalidOperationException($"Stop words document {options.StopWordsDocumentId} could not be found");
                }

                if (stopWordsDoc.Data.TryGet(nameof(MoreLikeThisStopWords.StopWords), out BlittableJsonReaderArray value) && value != null)
                {
                    stopWords = new HashSet <string>(StringComparer.OrdinalIgnoreCase);
                    for (var i = 0; i < value.Length; i++)
                    {
                        stopWords.Add(value.GetStringByIndex(i));
                    }
                }
            }

            var ir  = _searcher.IndexReader;
            var mlt = new RavenMoreLikeThis(ir, options, _state);

            int?baseDocId = null;

            if (moreLikeThisQuery.BaseDocument == null)
            {
                var td = _searcher.Search(moreLikeThisQuery.BaseDocumentQuery, 1, _state);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Given filtering expression did not yield any documents that could be used as a base of comparison");
                }

                baseDocId = td.ScoreDocs[0].Doc;
            }

            if (stopWords != null)
            {
                mlt.SetStopWords(stopWords);
            }

            string[] fieldNames;
            if (options.Fields != null && options.Fields.Length > 0)
            {
                fieldNames = options.Fields;
            }
            else
            {
                fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED)
                             .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName)
                             .ToArray();
            }

            mlt.SetFieldNames(fieldNames);
            mlt.Analyzer = _analyzer;

            var pageSize = GetPageSize(_searcher, query.PageSize);

            Query mltQuery;

            if (baseDocId.HasValue)
            {
                mltQuery = mlt.Like(baseDocId.Value);
            }
            else
            {
                using (var blittableJson = ParseJsonStringIntoBlittable(moreLikeThisQuery.BaseDocument, context))
                    mltQuery = mlt.Like(blittableJson);
            }

            var tsdc = TopScoreDocCollector.Create(pageSize, true);

            if (moreLikeThisQuery.FilterQuery != null && moreLikeThisQuery.FilterQuery is MatchAllDocsQuery == false)
            {
                mltQuery = new BooleanQuery
                {
                    { mltQuery, Occur.MUST },
                    { moreLikeThisQuery.FilterQuery, Occur.MUST }
                };
            }

            _searcher.Search(mltQuery, tsdc, _state);
            var hits = tsdc.TopDocs().ScoreDocs;

            var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase);

            foreach (var hit in hits)
            {
                if (hit.Doc == baseDocId)
                {
                    continue;
                }

                var doc = _searcher.Doc(hit.Doc, _state);
                var id  = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state);
                if (id == null)
                {
                    continue;
                }

                if (ids.Add(id) == false)
                {
                    continue;
                }

                yield return(retriever.Get(doc, hit.Score, _state));
            }
        }
Exemple #5
0
        public IEnumerable <Document> MoreLikeThis(MoreLikeThisQueryServerSide query, HashSet <string> stopWords, Func <string[], IQueryResultRetriever> createRetriever, CancellationToken token)
        {
            var documentQuery = new BooleanQuery();

            if (string.IsNullOrWhiteSpace(query.DocumentId) == false)
            {
                documentQuery.Add(new TermQuery(new Term(Constants.Indexing.Fields.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
            }

            foreach (var key in query.MapGroupFields.Keys)
            {
                documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
            }

            var td = _searcher.Search(documentQuery, 1);

            // get the current Lucene docid for the given RavenDB doc ID
            if (td.ScoreDocs.Length == 0)
            {
                throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
            }

            var ir  = _searcher.IndexReader;
            var mlt = new RavenMoreLikeThis(ir, query);

            if (stopWords != null)
            {
                mlt.SetStopWords(stopWords);
            }

            var fieldNames = query.Fields ?? ir.GetFieldNames(IndexReader.FieldOption.INDEXED)
                             .Where(x => x != Constants.Indexing.Fields.DocumentIdFieldName && x != Constants.Indexing.Fields.ReduceKeyFieldName)
                             .ToArray();

            mlt.SetFieldNames(fieldNames);
            mlt.Analyzer = _analyzer;

            var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
            var tsdc     = TopScoreDocCollector.Create(query.PageSize, true);

            if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false)
            {
                var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, _analyzer);
                mltQuery = new BooleanQuery
                {
                    { mltQuery, Occur.MUST },
                    { additionalQuery, Occur.MUST },
                };
            }

            _searcher.Search(mltQuery, tsdc);
            var hits      = tsdc.TopDocs().ScoreDocs;
            var baseDocId = td.ScoreDocs[0].Doc;

            var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase);

            var fieldsToFetch = string.IsNullOrWhiteSpace(query.DocumentId)
                ? _searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray()
                : null;

            var retriever = createRetriever(fieldsToFetch);

            foreach (var hit in hits)
            {
                if (hit.Doc == baseDocId)
                {
                    continue;
                }

                var doc = _searcher.Doc(hit.Doc);
                var id  = doc.Get(Constants.Indexing.Fields.DocumentIdFieldName) ?? doc.Get(Constants.Indexing.Fields.ReduceKeyFieldName);
                if (id == null)
                {
                    continue;
                }

                if (ids.Add(id) == false)
                {
                    continue;
                }

                yield return(retriever.Get(doc, hit.Score));
            }
        }