private static void AssignParameters(Similarity.Net.MoreLikeThis mlt, MoreLikeThisQueryParameters parameters) { if (parameters.Boost != null) { mlt.SetBoost(parameters.Boost.Value); } if (parameters.MaximumNumberOfTokensParsed != null) { mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); } if (parameters.MaximumNumberOfTokensParsed != null) { mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); } if (parameters.MaximumQueryTerms != null) { mlt.SetMaxQueryTerms(parameters.MaximumQueryTerms.Value); } if (parameters.MaximumWordLength != null) { mlt.SetMaxWordLen(parameters.MaximumWordLength.Value); } if (parameters.MinimumDocumentFrequency != null) { mlt.SetMinDocFreq(parameters.MinimumDocumentFrequency.Value); } if (parameters.MinimumTermFrequency != null) { mlt.SetMinTermFreq(parameters.MinimumTermFrequency.Value); } if (parameters.MinimumWordLength != null) { mlt.SetMinWordLen(parameters.MinimumWordLength.Value); } }
private IEnumerable <JsonDocument> GetJsonDocuments(MoreLikeThisQueryParameters parameters, IndexSearcher searcher, string index, IEnumerable <ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.doc != baseDocId) .Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return(documentIds .Select(docId => Database.Get(docId, null)) .Where(it => it != null) .ToArray()); } var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name()).Distinct().ToArray(); var etag = Database.GetIndexEtag(index, null); return(hits .Where(hit => hit.doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.doc), fields), Etag = etag }) .ToArray()); }
public void Can_encode_decode_request_by_documentId() { var parameters = new MoreLikeThisQueryParameters(); parameters.IndexName = "dataIndex"; parameters.DocumentId = "foo/1"; parameters.Fields = new[] { "Body" }; parameters.MinimumWordLength = 3; parameters.MinimumDocumentFrequency = 1; parameters.Boost = true; var uri = parameters.GetRequestUri(parameters.IndexName); Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%2F1&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri); var path = uri.Substring(0, uri.IndexOf('?')); var decodedParameters = MoreLikeThisQueryParameters.GetParametersFromPath(uri); Assert.Equal("dataIndex", decodedParameters.IndexName); Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters)); }
public override void Respond(IHttpContext context) { var parameters = MoreLikeThisQueryParameters.GetParametersFromPath(context.GetRequestUrl(), context.Request.QueryString); var index = Database.IndexStorage.GetIndexInstance(parameters.IndexName); if (index == null) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "The index " + parameters.IndexName + " cannot be found" }); return; } if (string.IsNullOrEmpty(parameters.DocumentId) && parameters.MapGroupFields.Count == 0) { context.SetStatusToBadRequest(); context.WriteJson(new { Error = "The document id or map group fields are mandatory" }); return; } PerformSearch(context, parameters.IndexName, index, parameters); }
public override void Respond(IHttpContext context) { var match = urlMatcher.Match(context.GetRequestUrl()); var indexName = match.Groups[1].Value; var parameters = new MoreLikeThisQueryParameters { DocumentId = match.Groups[2].Value, Fields = context.Request.QueryString.GetValues("fields"), Boost = context.Request.QueryString.Get("boost").ToNullableBool(), MaximumNumberOfTokensParsed = context.Request.QueryString.Get("maxNumTokens").ToNullableInt(), MaximumQueryTerms = context.Request.QueryString.Get("maxQueryTerms").ToNullableInt(), MaximumWordLength = context.Request.QueryString.Get("maxWordLen").ToNullableInt(), MinimumDocumentFrequency = context.Request.QueryString.Get("minDocFreq").ToNullableInt(), MinimumTermFrequency = context.Request.QueryString.Get("minTermFreq").ToNullableInt(), MinimumWordLength = context.Request.QueryString.Get("minWordLen").ToNullableInt(), StopWordsDocumentId = context.Request.QueryString.Get("stopWords"), }; var indexDefinition = Database.IndexDefinitionStorage.GetIndexDefinition(indexName); if (indexDefinition == null) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "The index " + indexName + " cannot be found" }); return; } if (string.IsNullOrEmpty(parameters.DocumentId)) { context.SetStatusToBadRequest(); context.WriteJson(new { Error = "The document id is mandatory" }); return; } PerformSearch(context, indexName, indexDefinition, parameters); }
private void PerformSearch(IHttpContext context, string indexName, IndexDefinition indexDefinition, MoreLikeThisQueryParameters parameters) { IndexSearcher searcher; using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher)) { var td = searcher.Search(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId)), 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" }); return; } var ir = searcher.GetIndexReader(); var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, parameters); if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId)) { var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null); if (stopWordsDoc == null) { context.SetStatusToNotFound(); context.WriteJson( new { Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found" }); return; } var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords; mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower()))); } var fieldNames = parameters.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); mlt.Analyzers = GetAnalyzers(indexDefinition, fieldNames); var mltQuery = mlt.Like(td.ScoreDocs[0].doc); var tsdc = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var documentIds = hits.Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName)).Distinct(); var jsonDocuments = documentIds .Where(docId => string.Equals(docId, parameters.DocumentId, StringComparison.InvariantCultureIgnoreCase) == false) .Select(docId => Database.Get(docId, null)) .Where(it => it != null) .ToArray(); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds); foreach (var jsonDocumet in jsonDocuments) { result.Results.Add(jsonDocumet.ToJson()); addIncludesCommand.Execute(jsonDocumet.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } if (context.MatchEtag(computedEtag)) { context.SetStatusToNotModified(); return; } context.Response.AddHeader("ETag", computedEtag.ToString()); context.WriteJson(result); } }
private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters) { IndexSearcher searcher; using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher)) { var documentQuery = new BooleanQuery(); if (!string.IsNullOrEmpty(parameters.DocumentId)) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())), BooleanClause.Occur.MUST); } foreach (string key in parameters.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])), BooleanClause.Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" }); return; } var ir = searcher.GetIndexReader(); var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, parameters); if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId)) { var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null); if (stopWordsDoc == null) { context.SetStatusToNotFound(); context.WriteJson( new { Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found" }); return; } var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords; mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower()))); } var fieldNames = parameters.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List <Action>(); PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.SetAnalyzer(perFieldAnalyzerWrapper); var mltQuery = mlt.Like(td.ScoreDocs[0].doc); var tsdc = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds); foreach (var jsonDocumet in jsonDocuments) { result.Results.Add(jsonDocumet.ToJson()); addIncludesCommand.Execute(jsonDocumet.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } if (context.MatchEtag(computedEtag)) { context.SetStatusToNotModified(); return; } context.Response.AddHeader("ETag", computedEtag.ToString()); context.WriteJson(result); } finally { if (perFieldAnalyzerWrapper != null) { perFieldAnalyzerWrapper.Close(); } foreach (var action in toDispose) { action(); } } } }