Esempio n. 1
0
 private static void AssignParameters(Similarity.Net.MoreLikeThis mlt, MoreLikeThisQueryParameters parameters)
 {
     if (parameters.Boost != null)
     {
         mlt.SetBoost(parameters.Boost.Value);
     }
     if (parameters.MaximumNumberOfTokensParsed != null)
     {
         mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value);
     }
     if (parameters.MaximumNumberOfTokensParsed != null)
     {
         mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value);
     }
     if (parameters.MaximumQueryTerms != null)
     {
         mlt.SetMaxQueryTerms(parameters.MaximumQueryTerms.Value);
     }
     if (parameters.MaximumWordLength != null)
     {
         mlt.SetMaxWordLen(parameters.MaximumWordLength.Value);
     }
     if (parameters.MinimumDocumentFrequency != null)
     {
         mlt.SetMinDocFreq(parameters.MinimumDocumentFrequency.Value);
     }
     if (parameters.MinimumTermFrequency != null)
     {
         mlt.SetMinTermFreq(parameters.MinimumTermFrequency.Value);
     }
     if (parameters.MinimumWordLength != null)
     {
         mlt.SetMinWordLen(parameters.MinimumWordLength.Value);
     }
 }
        private IEnumerable <JsonDocument> GetJsonDocuments(MoreLikeThisQueryParameters parameters, IndexSearcher searcher, string index, IEnumerable <ScoreDoc> hits, int baseDocId)
        {
            if (string.IsNullOrEmpty(parameters.DocumentId) == false)
            {
                var documentIds = hits
                                  .Where(hit => hit.doc != baseDocId)
                                  .Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName))
                                  .Where(x => x != null)
                                  .Distinct();

                return(documentIds
                       .Select(docId => Database.Get(docId, null))
                       .Where(it => it != null)
                       .ToArray());
            }

            var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name()).Distinct().ToArray();
            var etag   = Database.GetIndexEtag(index, null);

            return(hits
                   .Where(hit => hit.doc != baseDocId)
                   .Select(hit => new JsonDocument
            {
                DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.doc), fields),
                Etag = etag
            })
                   .ToArray());
        }
		public void Can_encode_decode_request_by_documentId()
		{
			var parameters = new MoreLikeThisQueryParameters();

			parameters.IndexName = "dataIndex";
			parameters.DocumentId = "foo/1";
			parameters.Fields = new[] { "Body" };
			parameters.MinimumWordLength = 3;
			parameters.MinimumDocumentFrequency = 1;
			parameters.Boost = true;

			var uri = parameters.GetRequestUri(parameters.IndexName);

			Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%2F1&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri);

			var path = uri.Substring(0, uri.IndexOf('?'));

			var decodedParameters = MoreLikeThisQueryParameters.GetParametersFromPath(uri);

			Assert.Equal("dataIndex", decodedParameters.IndexName);
			Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters));
		}
        public override void Respond(IHttpContext context)
        {
            var parameters = MoreLikeThisQueryParameters.GetParametersFromPath(context.GetRequestUrl(), context.Request.QueryString);

            var index = Database.IndexStorage.GetIndexInstance(parameters.IndexName);

            if (index == null)
            {
                context.SetStatusToNotFound();
                context.WriteJson(new { Error = "The index " + parameters.IndexName + " cannot be found" });
                return;
            }

            if (string.IsNullOrEmpty(parameters.DocumentId) && parameters.MapGroupFields.Count == 0)
            {
                context.SetStatusToBadRequest();
                context.WriteJson(new { Error = "The document id or map group fields are mandatory" });
                return;
            }

            PerformSearch(context, parameters.IndexName, index, parameters);
        }
Esempio n. 5
0
        public override void Respond(IHttpContext context)
        {
            var match     = urlMatcher.Match(context.GetRequestUrl());
            var indexName = match.Groups[1].Value;

            var parameters = new MoreLikeThisQueryParameters
            {
                DocumentId = match.Groups[2].Value,
                Fields     = context.Request.QueryString.GetValues("fields"),
                Boost      = context.Request.QueryString.Get("boost").ToNullableBool(),
                MaximumNumberOfTokensParsed = context.Request.QueryString.Get("maxNumTokens").ToNullableInt(),
                MaximumQueryTerms           = context.Request.QueryString.Get("maxQueryTerms").ToNullableInt(),
                MaximumWordLength           = context.Request.QueryString.Get("maxWordLen").ToNullableInt(),
                MinimumDocumentFrequency    = context.Request.QueryString.Get("minDocFreq").ToNullableInt(),
                MinimumTermFrequency        = context.Request.QueryString.Get("minTermFreq").ToNullableInt(),
                MinimumWordLength           = context.Request.QueryString.Get("minWordLen").ToNullableInt(),
                StopWordsDocumentId         = context.Request.QueryString.Get("stopWords"),
            };

            var indexDefinition = Database.IndexDefinitionStorage.GetIndexDefinition(indexName);

            if (indexDefinition == null)
            {
                context.SetStatusToNotFound();
                context.WriteJson(new { Error = "The index " + indexName + " cannot be found" });
                return;
            }

            if (string.IsNullOrEmpty(parameters.DocumentId))
            {
                context.SetStatusToBadRequest();
                context.WriteJson(new { Error = "The document id is mandatory" });
                return;
            }

            PerformSearch(context, indexName, indexDefinition, parameters);
        }
Esempio n. 6
0
        private void PerformSearch(IHttpContext context, string indexName, IndexDefinition indexDefinition, MoreLikeThisQueryParameters parameters)
        {
            IndexSearcher searcher;

            using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
            {
                var td = searcher.Search(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId)), 1);
                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    context.SetStatusToNotFound();
                    context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
                    return;
                }
                var ir  = searcher.GetIndexReader();
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, parameters);

                if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
                {
                    var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        context.SetStatusToNotFound();
                        context.WriteJson(
                            new
                        {
                            Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
                        });
                        return;
                    }
                    var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords;
                    mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower())));
                }

                var fieldNames = parameters.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                mlt.Analyzers = GetAnalyzers(indexDefinition, fieldNames);

                var mltQuery = mlt.Like(td.ScoreDocs[0].doc);
                var tsdc     = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
                searcher.Search(mltQuery, tsdc);
                var hits        = tsdc.TopDocs().ScoreDocs;
                var documentIds = hits.Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName)).Distinct();

                var jsonDocuments =
                    documentIds
                    .Where(docId => string.Equals(docId, parameters.DocumentId, StringComparison.InvariantCultureIgnoreCase) == false)
                    .Select(docId => Database.Get(docId, null))
                    .Where(it => it != null)
                    .ToArray();

                var result = new MultiLoadResult();

                var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
                var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
                {
                    includedEtags.AddRange(etag.ToByteArray());
                    result.Includes.Add(includedDoc);
                }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

                foreach (var jsonDocumet in jsonDocuments)
                {
                    result.Results.Add(jsonDocumet.ToJson());
                    addIncludesCommand.Execute(jsonDocumet.DataAsJson);
                }

                Guid computedEtag;
                using (var md5 = MD5.Create())
                {
                    var computeHash = md5.ComputeHash(includedEtags.ToArray());
                    computedEtag = new Guid(computeHash);
                }

                if (context.MatchEtag(computedEtag))
                {
                    context.SetStatusToNotModified();
                    return;
                }

                context.Response.AddHeader("ETag", computedEtag.ToString());
                context.WriteJson(result);
            }
        }
        private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
        {
            IndexSearcher searcher;

            using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (!string.IsNullOrEmpty(parameters.DocumentId))
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())),
                                      BooleanClause.Occur.MUST);
                }

                foreach (string key in parameters.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
                                      BooleanClause.Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    context.SetStatusToNotFound();
                    context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
                    return;
                }

                var ir  = searcher.GetIndexReader();
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, parameters);

                if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
                {
                    var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        context.SetStatusToNotFound();
                        context.WriteJson(
                            new
                        {
                            Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
                        });
                        return;
                    }
                    var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords;
                    mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower())));
                }

                var fieldNames = parameters.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].doc);
                    var tsdc     = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

                    foreach (var jsonDocumet in jsonDocuments)
                    {
                        result.Results.Add(jsonDocumet.ToJson());
                        addIncludesCommand.Execute(jsonDocumet.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    if (context.MatchEtag(computedEtag))
                    {
                        context.SetStatusToNotModified();
                        return;
                    }

                    context.Response.AddHeader("ETag", computedEtag.ToString());
                    context.WriteJson(result);
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }