public static MoreLikeThisQueryParameters GetParametersFromPath(string path, NameValueCollection query) { var results = new MoreLikeThisQueryParameters { IndexName = query.Get("index"), Fields = query.GetValues("fields"), Boost = query.Get("boost").ToNullableBool(), MaximumNumberOfTokensParsed = query.Get("maxNumTokens").ToNullableInt(), MaximumQueryTerms = query.Get("maxQueryTerms").ToNullableInt(), MaximumWordLength = query.Get("maxWordLen").ToNullableInt(), MinimumDocumentFrequency = query.Get("minDocFreq").ToNullableInt(), MinimumTermFrequency = query.Get("minTermFreq").ToNullableInt(), MinimumWordLength = query.Get("minWordLen").ToNullableInt(), StopWordsDocumentId = query.Get("stopWords"), }; var keyValues = query.Get("docid").Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (var keyValue in keyValues) { var split = keyValue.IndexOf('='); if (split >= 0) { results.MapGroupFields.Add(keyValue.Substring(0, split), keyValue.Substring(split + 1)); } else { results.DocumentId = keyValue; } } return(results); }
public void Can_encode_decode_request_by_documentId() { var parameters = new MoreLikeThisQueryParameters(); parameters.IndexName = "dataIndex"; parameters.DocumentId = "foo/1"; parameters.Fields = new[] { "Body" }; parameters.MinimumWordLength = 3; parameters.MinimumDocumentFrequency = 1; parameters.Boost = true; var uri = parameters.GetRequestUri(parameters.IndexName); Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%2F1&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri); var path = uri.Substring(0, uri.IndexOf('?')); var queryString = HttpUtility.ParseQueryString(uri.Substring(uri.IndexOf('?'))); var decodedParameters = MoreLikeThisQueryParameters.GetParametersFromPath(path, queryString); Assert.Equal("dataIndex", decodedParameters.IndexName); Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters)); }
public static MoreLikeThisQueryParameters GetParametersFromPath(string path, NameValueCollection query) { var results = new MoreLikeThisQueryParameters { IndexName = query.Get("index"), Fields = query.GetValues("fields"), Boost = query.Get("boost").ToNullableBool(), MaximumNumberOfTokensParsed = query.Get("maxNumTokens").ToNullableInt(), MaximumQueryTerms = query.Get("maxQueryTerms").ToNullableInt(), MaximumWordLength = query.Get("maxWordLen").ToNullableInt(), MinimumDocumentFrequency = query.Get("minDocFreq").ToNullableInt(), MinimumTermFrequency = query.Get("minTermFreq").ToNullableInt(), MinimumWordLength = query.Get("minWordLen").ToNullableInt(), StopWordsDocumentId = query.Get("stopWords"), }; var keyValues = query.Get("docid").Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach(var keyValue in keyValues) { var split = keyValue.IndexOf('='); if (split >= 0) { results.MapGroupFields.Add(keyValue.Substring(0, split), keyValue.Substring(split+1)); } else { results.DocumentId = keyValue; } } return results; }
private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters) { IndexSearcher searcher; using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher)) { var documentQuery = new BooleanQuery(); if (!string.IsNullOrEmpty(parameters.DocumentId)) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (string key in parameters.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])), Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" }); return; } var ir = searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, parameters); if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId)) { var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null); if (stopWordsDoc == null) { context.SetStatusToNotFound(); context.WriteJson( new { Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found" }); return; } var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>(); if(stopWordsSetup.StopWords != null) { var stopWords = stopWordsSetup.StopWords; var ht = new Hashtable(StringComparer.InvariantCultureIgnoreCase); foreach (var stopWord in stopWords) { ht[stopWord] = stopWord; } mlt.SetStopWords(ht); } } var fieldNames = parameters.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List<Action>(); PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.SetAnalyzer(perFieldAnalyzerWrapper); var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(context.GetPageSize(Database.Configuration.MaxPageSize), true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(parameters, searcher, index, indexName, hits, td.ScoreDocs[0].Doc); var result = new MultiLoadResult(); var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray()); var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds); foreach (var jsonDocumet in jsonDocuments) { result.Results.Add(jsonDocumet.ToJson()); addIncludesCommand.Execute(jsonDocumet.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } if (context.MatchEtag(computedEtag)) { context.SetStatusToNotModified(); return; } context.Response.AddHeader("ETag", computedEtag.ToString()); context.WriteJson(result); } finally { if (perFieldAnalyzerWrapper != null) perFieldAnalyzerWrapper.Close(); foreach (var action in toDispose) { action(); } } } }
private static void AssignParameters(Similarity.Net.MoreLikeThis mlt, MoreLikeThisQueryParameters parameters) { if (parameters.Boost != null) mlt.SetBoost(parameters.Boost.Value); if (parameters.MaximumNumberOfTokensParsed != null) mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); if (parameters.MaximumNumberOfTokensParsed != null) mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); if (parameters.MaximumQueryTerms != null) mlt.SetMaxQueryTerms(parameters.MaximumQueryTerms.Value); if (parameters.MaximumWordLength != null) mlt.SetMaxWordLen(parameters.MaximumWordLength.Value); if (parameters.MinimumDocumentFrequency != null) mlt.SetMinDocFreq(parameters.MinimumDocumentFrequency.Value); if (parameters.MinimumTermFrequency != null) mlt.SetMinTermFreq(parameters.MinimumTermFrequency.Value); if (parameters.MinimumWordLength != null) mlt.SetMinWordLen(parameters.MinimumWordLength.Value); }
private IEnumerable<JsonDocument> GetJsonDocuments( MoreLikeThisQueryParameters parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable<ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.Doc != baseDocId) .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return documentIds .Select(docId => Database.Get(docId, null)) .Where(it => it != null) .ToArray(); } var fields = searcher.Doc(baseDocId).GetFields().Cast<AbstractField>().Select(x => x.Name).Distinct().ToArray(); var etag = Database.GetIndexEtag(indexName, null); return hits .Where(hit => hit.Doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, AggregationOperation.None, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)), Etag = etag }) .ToArray(); }