예제 #1
0
        public static MoreLikeThisQueryParameters GetParametersFromPath(string path, NameValueCollection query)
        {
            var results = new MoreLikeThisQueryParameters
            {
                IndexName = query.Get("index"),
                Fields    = query.GetValues("fields"),
                Boost     = query.Get("boost").ToNullableBool(),
                MaximumNumberOfTokensParsed = query.Get("maxNumTokens").ToNullableInt(),
                MaximumQueryTerms           = query.Get("maxQueryTerms").ToNullableInt(),
                MaximumWordLength           = query.Get("maxWordLen").ToNullableInt(),
                MinimumDocumentFrequency    = query.Get("minDocFreq").ToNullableInt(),
                MinimumTermFrequency        = query.Get("minTermFreq").ToNullableInt(),
                MinimumWordLength           = query.Get("minWordLen").ToNullableInt(),
                StopWordsDocumentId         = query.Get("stopWords"),
            };

            var keyValues = query.Get("docid").Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var keyValue in keyValues)
            {
                var split = keyValue.IndexOf('=');

                if (split >= 0)
                {
                    results.MapGroupFields.Add(keyValue.Substring(0, split), keyValue.Substring(split + 1));
                }
                else
                {
                    results.DocumentId = keyValue;
                }
            }

            return(results);
        }
		public void Can_encode_decode_request_by_documentId()
		{
			var parameters = new MoreLikeThisQueryParameters();

			parameters.IndexName = "dataIndex";
			parameters.DocumentId = "foo/1";
			parameters.Fields = new[] { "Body" };
			parameters.MinimumWordLength = 3;
			parameters.MinimumDocumentFrequency = 1;
			parameters.Boost = true;

			var uri = parameters.GetRequestUri(parameters.IndexName);

			Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%2F1&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri);

			var path = uri.Substring(0, uri.IndexOf('?'));
			var queryString = HttpUtility.ParseQueryString(uri.Substring(uri.IndexOf('?')));
			var decodedParameters = MoreLikeThisQueryParameters.GetParametersFromPath(path, queryString);

			Assert.Equal("dataIndex", decodedParameters.IndexName);
			Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters));
		}
예제 #3
0
		public static MoreLikeThisQueryParameters GetParametersFromPath(string path, NameValueCollection query)
		{
			var results = new MoreLikeThisQueryParameters
			{
				IndexName = query.Get("index"),
				Fields = query.GetValues("fields"),
				Boost = query.Get("boost").ToNullableBool(),
				MaximumNumberOfTokensParsed = query.Get("maxNumTokens").ToNullableInt(),
				MaximumQueryTerms = query.Get("maxQueryTerms").ToNullableInt(),
				MaximumWordLength = query.Get("maxWordLen").ToNullableInt(),
				MinimumDocumentFrequency = query.Get("minDocFreq").ToNullableInt(),
				MinimumTermFrequency = query.Get("minTermFreq").ToNullableInt(),
				MinimumWordLength = query.Get("minWordLen").ToNullableInt(),
				StopWordsDocumentId = query.Get("stopWords"),
			};

			var keyValues = query.Get("docid").Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
			foreach(var keyValue in keyValues)
			{
				var split = keyValue.IndexOf('=');

				if (split >= 0)
				{
					results.MapGroupFields.Add(keyValue.Substring(0, split), keyValue.Substring(split+1));
				} 
				else
				{
					results.DocumentId = keyValue;
				}
			}

			return results;
		}
예제 #4
0
		private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
		{
			IndexSearcher searcher;
			using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
			{
				var documentQuery = new BooleanQuery();

				if (!string.IsNullOrEmpty(parameters.DocumentId))
				{
					documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())),
									  Occur.MUST);
				}

				foreach (string key in parameters.MapGroupFields.Keys)
				{
					documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
									  Occur.MUST);
				}

				var td = searcher.Search(documentQuery, 1);

				// get the current Lucene docid for the given RavenDB doc ID
				if (td.ScoreDocs.Length == 0)
				{
					context.SetStatusToNotFound();
					context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
					return;
				}

				var ir = searcher.IndexReader;
				var mlt = new RavenMoreLikeThis(ir);

				AssignParameters(mlt, parameters);

				if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
				{
					var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
					if (stopWordsDoc == null)
					{
						context.SetStatusToNotFound();
						context.WriteJson(
							new
							{
								Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
							});
						return;
					}
					var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>();
					if(stopWordsSetup.StopWords != null)
					{
						var stopWords = stopWordsSetup.StopWords;
						var ht = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
						foreach (var stopWord in stopWords)
						{
							ht[stopWord] = stopWord;
						}
						mlt.SetStopWords(ht);	
					}
				}

				var fieldNames = parameters.Fields ?? GetFieldNames(ir);
				mlt.SetFieldNames(fieldNames);

				var toDispose = new List<Action>();
				PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
				try
				{
					perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
					mlt.SetAnalyzer(perFieldAnalyzerWrapper);

					var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
					var tsdc = TopScoreDocCollector.Create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
					searcher.Search(mltQuery, tsdc);
					var hits = tsdc.TopDocs().ScoreDocs;
					var jsonDocuments = GetJsonDocuments(parameters, searcher, index, indexName, hits, td.ScoreDocs[0].Doc);

					var result = new MultiLoadResult();

					var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
					includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
					var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key));
					var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
					{
						includedEtags.AddRange(etag.ToByteArray());
						result.Includes.Add(includedDoc);
					}, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

					foreach (var jsonDocumet in jsonDocuments)
					{
						result.Results.Add(jsonDocumet.ToJson());
						addIncludesCommand.Execute(jsonDocumet.DataAsJson);
					}

					Guid computedEtag;
					using (var md5 = MD5.Create())
					{
						var computeHash = md5.ComputeHash(includedEtags.ToArray());
						computedEtag = new Guid(computeHash);
					}

					if (context.MatchEtag(computedEtag))
					{
						context.SetStatusToNotModified();
						return;
					}

					context.Response.AddHeader("ETag", computedEtag.ToString());
					context.WriteJson(result);
				}
				finally
				{
					if (perFieldAnalyzerWrapper != null)
						perFieldAnalyzerWrapper.Close();
					foreach (var action in toDispose)
					{
						action();
					}
				}
			}
		}
예제 #5
0
		private static void AssignParameters(Similarity.Net.MoreLikeThis mlt, MoreLikeThisQueryParameters parameters)
		{
			if (parameters.Boost != null) mlt.SetBoost(parameters.Boost.Value);
			if (parameters.MaximumNumberOfTokensParsed != null)
				mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value);
			if (parameters.MaximumNumberOfTokensParsed != null) mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value);
			if (parameters.MaximumQueryTerms != null) mlt.SetMaxQueryTerms(parameters.MaximumQueryTerms.Value);
			if (parameters.MaximumWordLength != null) mlt.SetMaxWordLen(parameters.MaximumWordLength.Value);
			if (parameters.MinimumDocumentFrequency != null) mlt.SetMinDocFreq(parameters.MinimumDocumentFrequency.Value);
			if (parameters.MinimumTermFrequency != null) mlt.SetMinTermFreq(parameters.MinimumTermFrequency.Value);
			if (parameters.MinimumWordLength != null) mlt.SetMinWordLen(parameters.MinimumWordLength.Value);
		}
예제 #6
0
		private IEnumerable<JsonDocument> GetJsonDocuments(
			MoreLikeThisQueryParameters parameters, IndexSearcher searcher, Index index,
			string indexName, IEnumerable<ScoreDoc> hits, int baseDocId)
		{
			if (string.IsNullOrEmpty(parameters.DocumentId) == false)
			{
				var documentIds = hits
					.Where(hit => hit.Doc != baseDocId)
					.Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName))
					.Where(x => x != null)
					.Distinct();

				return documentIds
					.Select(docId => Database.Get(docId, null))
					.Where(it => it != null)
					.ToArray();
			}

			var fields = searcher.Doc(baseDocId).GetFields().Cast<AbstractField>().Select(x => x.Name).Distinct().ToArray();
			var etag = Database.GetIndexEtag(indexName, null);
			return hits
				.Where(hit => hit.Doc != baseDocId)
				.Select(hit => new JsonDocument
				{
					DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, AggregationOperation.None,
						index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)),
					Etag = etag
				})
				.ToArray();

		}