public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25) { if (query == null) { throw new ArgumentNullException("query"); } var index = database.IndexStorage.GetIndexInstance(query.IndexName); if (index == null) { throw new InvalidOperationException("The index " + query.IndexName + " cannot be found"); } if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0) { throw new InvalidOperationException("The document id or map group fields are mandatory"); } IndexSearcher searcher; using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher)) { var documentQuery = new BooleanQuery(); if (string.IsNullOrEmpty(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (string key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } var ir = searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, query); if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false) { var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId, null); if (stopWordsDoc == null) { throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found"); } var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>(); if (stopWordsSetup.StopWords != null) { var stopWords = stopWordsSetup.StopWords; var ht = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); foreach (var stopWord in stopWords) { ht.Add(stopWord); } mlt.SetStopWords(ht); } } var fieldNames = query.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List <Action>(); RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.Analyzer = perFieldAnalyzerWrapper; var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(pageSize, true); if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false) { var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, perFieldAnalyzerWrapper); mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { additionalQuery, Occur.MUST }, }; } searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray())); includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, query.Includes ?? new string[0], loadedIds); idsToLoad = new HashSet <string>(); database.TransactionalStorage.Batch(actions => { documentRetriever = new DocumentRetriever(database.Configuration, actions, database.ReadTriggers, query.TransformerParameters, idsToLoad); using (new CurrentTransformationScope(database, documentRetriever)) { foreach (var document in ProcessResults(query, jsonDocuments, database.WorkContext.CancellationToken)) { result.Results.Add(document); addIncludesCommand.Execute(document); } } }); addIncludesCommand.AlsoInclude(idsToLoad); var computeHash = Encryptor.Current.Hash.Compute16(includedEtags.ToArray()); Etag computedEtag = Etag.Parse(computeHash); return(new MoreLikeThisQueryResult { Etag = computedEtag, Result = result, }); } finally { if (perFieldAnalyzerWrapper != null) { perFieldAnalyzerWrapper.Close(); } foreach (var action in toDispose) { action(); } } } }
public IEnumerable <Document> MoreLikeThis( MoreLikeThisQueryServerSide query, HashSet <string> stopWords, Func <SelectField[], IQueryResultRetriever> createRetriever, JsonOperationContext context, Func <string, SpatialField> getSpatialField, CancellationToken token) { int?baseDocId = null; if (string.IsNullOrWhiteSpace(query.DocumentId) == false || query.MapGroupFields.Count > 0) { var documentQuery = new BooleanQuery(); if (string.IsNullOrWhiteSpace(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.Documents.Indexing.Fields.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (var key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = _searcher.Search(documentQuery, 1, _state); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } baseDocId = td.ScoreDocs[0].Doc; } var ir = _searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir, query, _state); AssignParameters(mlt, query); if (stopWords != null) { mlt.SetStopWords(stopWords); } string[] fieldNames; if (query.Fields != null && query.Fields.Length > 0) { fieldNames = query.Fields; } else { fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED) .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName) .ToArray(); } mlt.SetFieldNames(fieldNames); mlt.Analyzer = _analyzer; var pageSize = GetPageSize(_searcher, query.PageSize); Query mltQuery; if (baseDocId.HasValue) { mltQuery = mlt.Like(baseDocId.Value); } else { using (var blittableJson = ParseJsonStringIntoBlittable(query.Document, context)) mltQuery = mlt.Like(blittableJson); } var tsdc = TopScoreDocCollector.Create(pageSize, true); if (query.Metadata.WhereFields.Count > 0) { var additionalQuery = QueryBuilder.BuildQuery(context, query.Metadata, query.Metadata.Query.Where, null, _analyzer, getSpatialField); mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { additionalQuery, Occur.MUST } }; } _searcher.Search(mltQuery, tsdc, _state); var hits = tsdc.TopDocs().ScoreDocs; var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase); var retriever = createRetriever(null); foreach (var hit in hits) { if (hit.Doc == baseDocId) { continue; } var doc = _searcher.Doc(hit.Doc, _state); var id = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state); if (id == null) { continue; } if (ids.Add(id) == false) { continue; } yield return(retriever.Get(doc, hit.Score, _state)); } }
public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25, string[] include = null) { if (query == null) { throw new ArgumentNullException("query"); } var index = database.IndexStorage.GetIndexInstance(query.IndexName); if (index == null) { throw new InvalidOperationException("The index " + query.IndexName + " cannot be found"); } if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0) { throw new InvalidOperationException("The document id or map group fields are mandatory"); } IndexSearcher searcher; using (database.IndexStorage.GetCurrentIndexSearcher(query.IndexName, out searcher)) { var documentQuery = new BooleanQuery(); if (string.IsNullOrEmpty(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (string key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } var ir = searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, query); if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false) { var stopWordsDoc = database.Get(query.StopWordsDocumentId, null); if (stopWordsDoc == null) { throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found"); } var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>(); if (stopWordsSetup.StopWords != null) { var stopWords = stopWordsSetup.StopWords; var ht = new Hashtable(StringComparer.InvariantCultureIgnoreCase); foreach (var stopWord in stopWords) { ht[stopWord] = stopWord; } mlt.SetStopWords(ht); } } var fieldNames = query.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List <Action>(); RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.SetAnalyzer(perFieldAnalyzerWrapper); var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(pageSize, true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(database.GetIndexEtag(query.IndexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, include ?? new string[0], loadedIds); foreach (var jsonDocument in jsonDocuments) { result.Results.Add(jsonDocument.ToJson()); addIncludesCommand.Execute(jsonDocument.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } return(new MoreLikeThisQueryResult { Etag = computedEtag, Result = result, }); } finally { if (perFieldAnalyzerWrapper != null) { perFieldAnalyzerWrapper.Close(); } foreach (var action in toDispose) { action(); } } } }
public IEnumerable <Document> MoreLikeThis( IndexQueryServerSide query, IQueryResultRetriever retriever, DocumentsOperationContext context, CancellationToken token) { IDisposable releaseServerContext = null; IDisposable closeServerTransaction = null; TransactionOperationContext serverContext = null; MoreLikeThisQuery moreLikeThisQuery; try { if (query.Metadata.HasCmpXchg) { releaseServerContext = context.DocumentDatabase.ServerStore.ContextPool.AllocateOperationContext(out serverContext); closeServerTransaction = serverContext.OpenReadTransaction(); } using (closeServerTransaction) moreLikeThisQuery = QueryBuilder.BuildMoreLikeThisQuery(serverContext, context, query.Metadata, query.Metadata.Query.Where, query.QueryParameters, _analyzer, _queryBuilderFactories); } finally { releaseServerContext?.Dispose(); } var options = moreLikeThisQuery.Options != null?JsonDeserializationServer.MoreLikeThisOptions(moreLikeThisQuery.Options) : MoreLikeThisOptions.Default; HashSet <string> stopWords = null; if (string.IsNullOrWhiteSpace(options.StopWordsDocumentId) == false) { var stopWordsDoc = context.DocumentDatabase.DocumentsStorage.Get(context, options.StopWordsDocumentId); if (stopWordsDoc == null) { throw new InvalidOperationException($"Stop words document {options.StopWordsDocumentId} could not be found"); } if (stopWordsDoc.Data.TryGet(nameof(MoreLikeThisStopWords.StopWords), out BlittableJsonReaderArray value) && value != null) { stopWords = new HashSet <string>(StringComparer.OrdinalIgnoreCase); for (var i = 0; i < value.Length; i++) { stopWords.Add(value.GetStringByIndex(i)); } } } var ir = _searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir, options, _state); int?baseDocId = null; if (moreLikeThisQuery.BaseDocument == null) { var td = _searcher.Search(moreLikeThisQuery.BaseDocumentQuery, 1, _state); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Given filtering expression did not yield any documents that could be used as a base of comparison"); } baseDocId = td.ScoreDocs[0].Doc; } if (stopWords != null) { mlt.SetStopWords(stopWords); } string[] fieldNames; if (options.Fields != null && options.Fields.Length > 0) { fieldNames = options.Fields; } else { fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED) .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName) .ToArray(); } mlt.SetFieldNames(fieldNames); mlt.Analyzer = _analyzer; var pageSize = GetPageSize(_searcher, query.PageSize); Query mltQuery; if (baseDocId.HasValue) { mltQuery = mlt.Like(baseDocId.Value); } else { using (var blittableJson = ParseJsonStringIntoBlittable(moreLikeThisQuery.BaseDocument, context)) mltQuery = mlt.Like(blittableJson); } var tsdc = TopScoreDocCollector.Create(pageSize, true); if (moreLikeThisQuery.FilterQuery != null && moreLikeThisQuery.FilterQuery is MatchAllDocsQuery == false) { mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { moreLikeThisQuery.FilterQuery, Occur.MUST } }; } _searcher.Search(mltQuery, tsdc, _state); var hits = tsdc.TopDocs().ScoreDocs; var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase); foreach (var hit in hits) { if (hit.Doc == baseDocId) { continue; } var doc = _searcher.Doc(hit.Doc, _state); var id = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state); if (id == null) { continue; } if (ids.Add(id) == false) { continue; } yield return(retriever.Get(doc, hit.Score, _state)); } }
public IEnumerable <Document> MoreLikeThis(MoreLikeThisQueryServerSide query, HashSet <string> stopWords, Func <string[], IQueryResultRetriever> createRetriever, CancellationToken token) { var documentQuery = new BooleanQuery(); if (string.IsNullOrWhiteSpace(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.Indexing.Fields.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (var key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = _searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } var ir = _searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir, query); if (stopWords != null) { mlt.SetStopWords(stopWords); } var fieldNames = query.Fields ?? ir.GetFieldNames(IndexReader.FieldOption.INDEXED) .Where(x => x != Constants.Indexing.Fields.DocumentIdFieldName && x != Constants.Indexing.Fields.ReduceKeyFieldName) .ToArray(); mlt.SetFieldNames(fieldNames); mlt.Analyzer = _analyzer; var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(query.PageSize, true); if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false) { var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, _analyzer); mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { additionalQuery, Occur.MUST }, }; } _searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var baseDocId = td.ScoreDocs[0].Doc; var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase); var fieldsToFetch = string.IsNullOrWhiteSpace(query.DocumentId) ? _searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray() : null; var retriever = createRetriever(fieldsToFetch); foreach (var hit in hits) { if (hit.Doc == baseDocId) { continue; } var doc = _searcher.Doc(hit.Doc); var id = doc.Get(Constants.Indexing.Fields.DocumentIdFieldName) ?? doc.Get(Constants.Indexing.Fields.ReduceKeyFieldName); if (id == null) { continue; } if (ids.Add(id) == false) { continue; } yield return(retriever.Get(doc, hit.Score)); } }