Exemplo n.º 1
0
		public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25, string[] include = null)
		{
			if (query == null) throw new ArgumentNullException("query");

			var index = database.IndexStorage.GetIndexInstance(query.IndexName);
			if (index == null)
				throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");

			if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
				throw new InvalidOperationException("The document id or map group fields are mandatory");

			IndexSearcher searcher;
			using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher))
			{
				var documentQuery = new BooleanQuery();

				if (string.IsNullOrEmpty(query.DocumentId) == false)
				{
					documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
				}

				foreach (string key in query.MapGroupFields.Keys)
				{
					documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
				}

				var td = searcher.Search(documentQuery, 1);

				// get the current Lucene docid for the given RavenDB doc ID
				if (td.ScoreDocs.Length == 0)
					throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");

				var ir = searcher.IndexReader;
				var mlt = new RavenMoreLikeThis(ir);

				AssignParameters(mlt, query);

				if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
				{
					var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId, null);
					if (stopWordsDoc == null)
						throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");

					var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>();
					if (stopWordsSetup.StopWords != null)
					{
						var stopWords = stopWordsSetup.StopWords;
						var ht = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
						foreach (var stopWord in stopWords)
						{
							ht.Add(stopWord);
						}
						mlt.SetStopWords(ht);
					}
				}

				var fieldNames = query.Fields ?? GetFieldNames(ir);
				mlt.SetFieldNames(fieldNames);

				var toDispose = new List<Action>();
				RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
				try
				{
					perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
					mlt.Analyzer = perFieldAnalyzerWrapper;

					var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
					var tsdc = TopScoreDocCollector.Create(pageSize, true);
					searcher.Search(mltQuery, tsdc);
					var hits = tsdc.TopDocs().ScoreDocs;
					var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

					var result = new MultiLoadResult();

					var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray()));
					includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray());
					var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key));
					var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
					{
						includedEtags.AddRange(etag.ToByteArray());
						result.Includes.Add(includedDoc);
					}, include ?? new string[0], loadedIds);

					foreach (var jsonDocument in jsonDocuments)
					{
						result.Results.Add(jsonDocument.ToJson());
						addIncludesCommand.Execute(jsonDocument.DataAsJson);
					}

					Etag computedEtag;
					using (var md5 = MD5.Create())
					{
						var computeHash = md5.ComputeHash(includedEtags.ToArray());
						computedEtag = Etag.Parse(computeHash);
					}

					return new MoreLikeThisQueryResult
					{
						Etag = computedEtag,
						Result = result,
					};
				}
				finally
				{
					if (perFieldAnalyzerWrapper != null)
						perFieldAnalyzerWrapper.Close();
					foreach (var action in toDispose)
					{
						action();
					}
				}
			}
		}
Exemplo n.º 2
0
		private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
		{
			IndexSearcher searcher;
			using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
			{
				var documentQuery = new BooleanQuery();

				if (!string.IsNullOrEmpty(parameters.DocumentId))
				{
					documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())),
									  Occur.MUST);
				}

				foreach (string key in parameters.MapGroupFields.Keys)
				{
					documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
									  Occur.MUST);
				}

				var td = searcher.Search(documentQuery, 1);

				// get the current Lucene docid for the given RavenDB doc ID
				if (td.ScoreDocs.Length == 0)
				{
					context.SetStatusToNotFound();
					context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
					return;
				}

				var ir = searcher.IndexReader;
				var mlt = new RavenMoreLikeThis(ir);

				AssignParameters(mlt, parameters);

				if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
				{
					var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
					if (stopWordsDoc == null)
					{
						context.SetStatusToNotFound();
						context.WriteJson(
							new
							{
								Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
							});
						return;
					}
					var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>();
					if(stopWordsSetup.StopWords != null)
					{
						var stopWords = stopWordsSetup.StopWords;
						var ht = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
						foreach (var stopWord in stopWords)
						{
							ht[stopWord] = stopWord;
						}
						mlt.SetStopWords(ht);	
					}
				}

				var fieldNames = parameters.Fields ?? GetFieldNames(ir);
				mlt.SetFieldNames(fieldNames);

				var toDispose = new List<Action>();
				PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
				try
				{
					perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
					mlt.SetAnalyzer(perFieldAnalyzerWrapper);

					var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
					var tsdc = TopScoreDocCollector.Create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
					searcher.Search(mltQuery, tsdc);
					var hits = tsdc.TopDocs().ScoreDocs;
					var jsonDocuments = GetJsonDocuments(parameters, searcher, index, indexName, hits, td.ScoreDocs[0].Doc);

					var result = new MultiLoadResult();

					var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
					includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
					var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key));
					var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
					{
						includedEtags.AddRange(etag.ToByteArray());
						result.Includes.Add(includedDoc);
					}, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

					foreach (var jsonDocumet in jsonDocuments)
					{
						result.Results.Add(jsonDocumet.ToJson());
						addIncludesCommand.Execute(jsonDocumet.DataAsJson);
					}

					Guid computedEtag;
					using (var md5 = MD5.Create())
					{
						var computeHash = md5.ComputeHash(includedEtags.ToArray());
						computedEtag = new Guid(computeHash);
					}

					if (context.MatchEtag(computedEtag))
					{
						context.SetStatusToNotModified();
						return;
					}

					context.Response.AddHeader("ETag", computedEtag.ToString());
					context.WriteJson(result);
				}
				finally
				{
					if (perFieldAnalyzerWrapper != null)
						perFieldAnalyzerWrapper.Close();
					foreach (var action in toDispose)
					{
						action();
					}
				}
			}
		}