Ejemplo n.º 1
0
        public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            var index = database.IndexStorage.GetIndexInstance(query.IndexName);

            if (index == null)
            {
                throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
            }

            if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
            {
                throw new InvalidOperationException("The document id or map group fields are mandatory");
            }

            IndexSearcher searcher;

            using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrEmpty(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (string key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                var ir  = searcher.IndexReader;
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, query);

                if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
                {
                    var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
                    }

                    var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>();
                    if (stopWordsSetup.StopWords != null)
                    {
                        var stopWords = stopWordsSetup.StopWords;
                        var ht        = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                        foreach (var stopWord in stopWords)
                        {
                            ht.Add(stopWord);
                        }
                        mlt.SetStopWords(ht);
                    }
                }

                var fieldNames = query.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.Analyzer            = perFieldAnalyzerWrapper;

                    var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
                    var tsdc     = TopScoreDocCollector.Create(pageSize, true);


                    if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false)
                    {
                        var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, perFieldAnalyzerWrapper);
                        mltQuery = new BooleanQuery
                        {
                            { mltQuery, Occur.MUST },
                            { additionalQuery, Occur.MUST },
                        };
                    }

                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray()));
                    includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, query.Includes ?? new string[0], loadedIds);

                    idsToLoad = new HashSet <string>();

                    database.TransactionalStorage.Batch(actions =>
                    {
                        documentRetriever = new DocumentRetriever(database.Configuration, actions, database.ReadTriggers, query.TransformerParameters, idsToLoad);

                        using (new CurrentTransformationScope(database, documentRetriever))
                        {
                            foreach (var document in ProcessResults(query, jsonDocuments, database.WorkContext.CancellationToken))
                            {
                                result.Results.Add(document);
                                addIncludesCommand.Execute(document);
                            }
                        }
                    });

                    addIncludesCommand.AlsoInclude(idsToLoad);

                    var  computeHash  = Encryptor.Current.Hash.Compute16(includedEtags.ToArray());
                    Etag computedEtag = Etag.Parse(computeHash);

                    return(new MoreLikeThisQueryResult
                    {
                        Etag = computedEtag,
                        Result = result,
                    });
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25, string[] include = null)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            var index = database.IndexStorage.GetIndexInstance(query.IndexName);

            if (index == null)
            {
                throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
            }

            if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
            {
                throw new InvalidOperationException("The document id or map group fields are mandatory");
            }

            IndexSearcher searcher;

            using (database.IndexStorage.GetCurrentIndexSearcher(query.IndexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrEmpty(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (string key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                var ir  = searcher.IndexReader;
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, query);

                if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
                {
                    var stopWordsDoc = database.Get(query.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
                    }

                    var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>();
                    if (stopWordsSetup.StopWords != null)
                    {
                        var stopWords = stopWordsSetup.StopWords;
                        var ht        = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
                        foreach (var stopWord in stopWords)
                        {
                            ht[stopWord] = stopWord;
                        }
                        mlt.SetStopWords(ht);
                    }
                }

                var fieldNames = query.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
                    var tsdc     = TopScoreDocCollector.Create(pageSize, true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(database.GetIndexEtag(query.IndexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, include ?? new string[0], loadedIds);

                    foreach (var jsonDocument in jsonDocuments)
                    {
                        result.Results.Add(jsonDocument.ToJson());
                        addIncludesCommand.Execute(jsonDocument.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    return(new MoreLikeThisQueryResult
                    {
                        Etag = computedEtag,
                        Result = result,
                    });
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }