public static MoreLikeThisQuery GetParametersFromPath(string path, NameValueCollection query) { var results = new MoreLikeThisQuery { IndexName = query.Get("index"), Fields = query.GetValues("fields"), Boost = query.Get("boost").ToNullableBool(), MaximumNumberOfTokensParsed = query.Get("maxNumTokens").ToNullableInt(), MaximumQueryTerms = query.Get("maxQueryTerms").ToNullableInt(), MaximumWordLength = query.Get("maxWordLen").ToNullableInt(), MinimumDocumentFrequency = query.Get("minDocFreq").ToNullableInt(), MinimumTermFrequency = query.Get("minTermFreq").ToNullableInt(), MinimumWordLength = query.Get("minWordLen").ToNullableInt(), StopWordsDocumentId = query.Get("stopWords"), }; var keyValues = query.Get("docid").Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (var keyValue in keyValues) { var split = keyValue.IndexOf('='); if (split >= 0) { results.MapGroupFields.Add(keyValue.Substring(0, split), keyValue.Substring(split + 1)); } else { results.DocumentId = keyValue; } } return(results); }
/// <summary> /// Return a list of documents that based on the MoreLikeThisQuery. /// </summary> /// <param name="query">The more like this query parameters</param> /// <returns></returns> public MultiLoadResult MoreLikeThis(MoreLikeThisQuery query) { CurrentOperationContext.Headers.Value = OperationsHeaders; var result = database.ExecuteMoreLikeThisQuery(query, TransactionInformation); return(result.Result); }
private static void AssignParameters(Similarity.Net.MoreLikeThis mlt, MoreLikeThisQuery parameters) { if (parameters.Boost != null) { mlt.SetBoost(parameters.Boost.Value); } if (parameters.MaximumNumberOfTokensParsed != null) { mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); } if (parameters.MaximumNumberOfTokensParsed != null) { mlt.SetMaxNumTokensParsed(parameters.MaximumNumberOfTokensParsed.Value); } if (parameters.MaximumQueryTerms != null) { mlt.SetMaxQueryTerms(parameters.MaximumQueryTerms.Value); } if (parameters.MaximumWordLength != null) { mlt.SetMaxWordLen(parameters.MaximumWordLength.Value); } if (parameters.MinimumDocumentFrequency != null) { mlt.SetMinDocFreq(parameters.MinimumDocumentFrequency.Value); } if (parameters.MinimumTermFrequency != null) { mlt.SetMinTermFreq(parameters.MinimumTermFrequency.Value); } if (parameters.MinimumWordLength != null) { mlt.SetMinWordLen(parameters.MinimumWordLength.Value); } }
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { MoreLikeThisQuery query = (MoreLikeThisQuery)value; if (query == null) { return; } writer.WriteStartObject(); writer.WritePropertyName("more_like_this"); writer.WriteStartObject(); writer.WritePropertyName("fields"); writer.WriteStartArray(); foreach (var field in query.Fields) { writer.WriteValue(field); } writer.WriteEndArray(); writer.WritePropertyName("like_text"); writer.WriteValue(query.LikeText); if (query.MinTermFreq != 2) { writer.WritePropertyName("min_term_freq"); writer.WriteValue(query.MinTermFreq); } if (query.MaxQueryTerm != 25) { writer.WritePropertyName("max_query_terms"); writer.WriteValue(query.MaxQueryTerm); } writer.WriteEndObject(); writer.WriteEndObject(); }
public void CanEncodeDecodeRequestOnIndexGrouping() { var parameters = new MoreLikeThisQuery { IndexName = "dataIndex", Fields = new[] { "Body" }, MinimumWordLength = 3, MinimumDocumentFrequency = 1, Boost = true, }; parameters.MapGroupFields.Add("foo", "bar"); parameters.MapGroupFields.Add("be", "bop"); var uri = parameters.GetRequestUri(); Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%3Dbar%3Bbe%3Dbop&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri); var path = uri.Substring(0, uri.IndexOf('?')); var queryString = HttpUtility.ParseQueryString(uri.Substring(uri.IndexOf('?'))); var decodedParameters = MoreLikeThisResponder.GetParametersFromPath(path, queryString); Assert.Equal("dataIndex", decodedParameters.IndexName); Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters)); }
public Lazy <Task <TResult[]> > MoreLikeThisAsync <TResult>(MoreLikeThisQuery query, CancellationToken token = default(CancellationToken)) { var multiLoadOperation = new MultiLoadOperation(this, AsyncDatabaseCommands.DisableAllCaching, null, null); var lazyOp = new LazyMoreLikeThisOperation <TResult>(multiLoadOperation, query); return(AddLazyOperation <TResult[]>(lazyOp, null, token)); }
Lazy <List <TResult> > ILazySessionOperations.MoreLikeThis <TResult>(MoreLikeThisQuery query) { //TODO - DisableAllCaching var lazyOp = new LazyMoreLikeThisOperation <TResult>(this, query); return(AddLazyOperation <List <TResult> >(lazyOp, null)); }
private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable <ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.Doc != baseDocId) .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return(documentIds .Select(docId => database.Documents.Get(docId, null)) .Where(it => it != null) .ToArray()); } var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray(); var etag = database.Indexes.GetIndexEtag(indexName, null); return(hits .Where(hit => hit.Doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)), Etag = etag }) .ToArray()); }
public Lazy <TResult[]> MoreLikeThis <TResult>(MoreLikeThisQuery query) { var multiLoadOperation = new MultiLoadOperation(this, DatabaseCommands.DisableAllCaching, null, null); var lazyOp = new LazyMoreLikeThisOperation <TResult>(multiLoadOperation, query); return(AddLazyOperation <TResult[]>(lazyOp, null)); }
public MoreLikeThisOperation(InMemoryDocumentSessionOperations session, MoreLikeThisQuery query) { _session = session ?? throw new ArgumentNullException(nameof(session)); _query = query ?? throw new ArgumentNullException(nameof(query)); if (_query.Query == null) { throw new ArgumentNullException(nameof(query.Query)); } }
public void TestMoreLikeThisQuery() { Query query = new MoreLikeThisQuery("this is a test", new[] { "text" }, new MockAnalyzer(Random), "text"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); }
public LazyMoreLikeThisOperation(InMemoryDocumentSessionOperations session, MoreLikeThisQuery query) { if (session == null) { throw new ArgumentNullException(nameof(session)); } _query = query ?? throw new ArgumentNullException(nameof(query)); _conventions = session.Conventions; _operation = new MoreLikeThisOperation(session, query); }
public List <T> MoreLikeThis <T, TIndexCreator>(MoreLikeThisQuery query) where TIndexCreator : AbstractIndexCreationTask, new() { if (query == null) { throw new ArgumentNullException(nameof(query)); } var index = new TIndexCreator(); query.Query = CreateQuery(index.IndexName); return(MoreLikeThis <T>(query)); }
/// <summary> /// PRIVATE METHODS /// </summary> //private MoreLikeThisQuery GetMoreLikeThisQuery(Field field, Document document) private MoreLikeThisQuery GetMoreLikeThisQuery(Field field, string documentText) { var query = new MoreLikeThisQuery { //Boost = 1.1, Fields = field, Like = new List <Like> { //new LikeDocument<Document>(document.Id), documentText }, MinimumShouldMatch = 1, StopWords = new[] { "and", "the", "as", "a" } }; return(query); }
public async Task <List <T> > MoreLikeThisAsync <T>(MoreLikeThisQuery query) { if (query == null) { throw new ArgumentNullException(nameof(query)); } var operation = new MoreLikeThisOperation(this, query); var command = operation.CreateRequest(); await RequestExecutor.ExecuteAsync(command, Context, sessionInfo : SessionInfo).ConfigureAwait(false); var result = command.Result; operation.SetResult(result); return(operation.Complete <T>()); }
private List <SearchResult> ShowSimilarResultsUsingMLTQuery(IndexSearcher searcher, string searchPhrase, string[] fields, int TopHits) { MoreLikeThisQuery query = new MoreLikeThisQuery(searchPhrase, fields, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); query.MinDocFreq = 1; query.MinTermFrequency = 1; ScoreDoc[] scoreDocs = searcher.Search(query, TopHits).ScoreDocs; List <SearchResult> results = new List <SearchResult>(); foreach (var scoreDoc in scoreDocs) { Document doc = searcher.Doc(scoreDoc.Doc); SearchResult result = new SearchResult(doc.Get("title"), doc.Get("description"), doc.Get("_group")); results.Add(result); } return(results); }
public List <T> MoreLikeThis <T>(MoreLikeThisQuery query) { if (query == null) { throw new ArgumentNullException(nameof(query)); } var operation = new MoreLikeThisOperation(this, query); var command = operation.CreateRequest(); RequestExecutor.Execute(command, Context, sessionInfo: SessionInfo); var result = command.Result; operation.SetResult(result); return(operation.Complete <T>()); }
private IEnumerable <RavenJObject> ProcessResults(MoreLikeThisQuery query, IEnumerable <JsonDocument> documents, CancellationToken token) { IndexingFunc transformFunc = null; if (string.IsNullOrEmpty(query.ResultsTransformer) == false) { var transformGenerator = database.IndexDefinitionStorage.GetTransformer(query.ResultsTransformer); if (transformGenerator != null && transformGenerator.TransformResultsDefinition != null) { transformFunc = transformGenerator.TransformResultsDefinition; } else { throw new InvalidOperationException("The transformer " + query.ResultsTransformer + " was not found"); } } IEnumerable <RavenJObject> results; var transformerErrors = new List <string>(); if (transformFunc == null) { results = documents.Select(x => x.ToJson()); } else { var robustEnumerator = new RobustEnumerator(token, 100) { OnError = (exception, o) => transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o), exception.Message)) }; results = robustEnumerator .RobustEnumeration(documents.Select(x => new DynamicJsonObject(x.ToJson())).GetEnumerator(), transformFunc) .Select(JsonExtensions.ToJObject); } return(results); }
/// <summary> /// Generates More Like This query for Elasticsearch for the given article ID. /// </summary> /// <param name="articleId">query will be generated for this article ID</param> /// <returns>More Like This query for Elasticsearch</returns> private MoreLikeThisQuery GetElasticMoreLikeThisQuery(string articleId) { var elasticMoreLikeThisQuery = new MoreLikeThisQuery() { Query = new Models.Elasticsearch.MoreLikeThis.Query() { MoreLikeThis = new MoreLikeThis() { Like = new Like() { Index = "news", Type = "_doc", Id = articleId }, MinimumDocumentFrequence = 1 } } }; return(elasticMoreLikeThisQuery); }
public void CanEncodeDecodeRequestByDocumentId() { var parameters = new MoreLikeThisQuery { IndexName = "dataIndex", DocumentId = "foo/1", Fields = new[] { "Body" }, MinimumWordLength = 3, MinimumDocumentFrequency = 1, Boost = true, }; var uri = parameters.GetRequestUri(); Assert.Equal("/morelikethis/?index=dataIndex&docid=foo%2F1&fields=Body&boost=true&minDocFreq=1&minWordLen=3&", uri); var path = uri.Substring(0, uri.IndexOf('?')); var queryString = HttpUtility.ParseQueryString(uri.Substring(uri.IndexOf('?'))); var decodedParameters = MoreLikeThisController.GetParametersFromPath(path, queryString); Assert.Equal("dataIndex", decodedParameters.IndexName); Assert.Equal(JsonConvert.SerializeObject(parameters), JsonConvert.SerializeObject(decodedParameters)); }
public Lazy <TResult[]> MoreLikeThis <TResult>(MoreLikeThisQuery query) { throw new NotSupportedException("Not supported for sharded session"); }
public Lazy <Task <TResult[]> > MoreLikeThisAsync <TResult>(MoreLikeThisQuery query, CancellationToken token = default(CancellationToken)) { throw new NotImplementedException(); }
public static T[] MoreLikeThis <T>(this ISyncAdvancedSessionOperation advancedSession, string index, MoreLikeThisQuery parameters) { if (string.IsNullOrEmpty(index)) { throw new ArgumentException("Index name cannot be null or empty", "index"); } parameters.IndexName = index; // /morelikethis/(index-name)/(ravendb-document-id)?fields=(fields) var cmd = advancedSession.DocumentStore.DatabaseCommands; var inMemoryDocumentSessionOperations = ((InMemoryDocumentSessionOperations)advancedSession); inMemoryDocumentSessionOperations.IncrementRequestCount(); var multiLoadOperation = new MultiLoadOperation(inMemoryDocumentSessionOperations, cmd.DisableAllCaching, null, null); MultiLoadResult multiLoadResult; do { multiLoadOperation.LogOperation(); using (multiLoadOperation.EnterMultiLoadContext()) { multiLoadResult = cmd.MoreLikeThis(parameters); } } while (multiLoadOperation.SetResult(multiLoadResult)); return(multiLoadOperation.Complete <T>()); }
private static void AssignParameters(Lucene.Net.Search.Similar.MoreLikeThis mlt, MoreLikeThisQuery parameters) { if (parameters.Boost != null) { mlt.Boost = parameters.Boost.Value; } if (parameters.BoostFactor != null) { mlt.BoostFactor = parameters.BoostFactor.Value; } if (parameters.MaximumNumberOfTokensParsed != null) { mlt.MaxNumTokensParsed = parameters.MaximumNumberOfTokensParsed.Value; } if (parameters.MaximumQueryTerms != null) { mlt.MaxQueryTerms = parameters.MaximumQueryTerms.Value; } if (parameters.MinimumWordLength != null) { mlt.MinWordLen = parameters.MinimumWordLength.Value; } if (parameters.MaximumWordLength != null) { mlt.MaxWordLen = parameters.MaximumWordLength.Value; } if (parameters.MinimumTermFrequency != null) { mlt.MinTermFreq = parameters.MinimumTermFrequency.Value; } if (parameters.MinimumDocumentFrequency != null) { mlt.MinDocFreq = parameters.MinimumDocumentFrequency.Value; } if (parameters.MaximumDocumentFrequency != null) { mlt.MaxDocFreq = parameters.MaximumDocumentFrequency.Value; } if (parameters.MaximumDocumentFrequencyPercentage != null) { mlt.SetMaxDocFreqPct(parameters.MaximumDocumentFrequencyPercentage.Value); } }
public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25) { if (query == null) { throw new ArgumentNullException("query"); } var index = database.IndexStorage.GetIndexInstance(query.IndexName); if (index == null) { throw new InvalidOperationException("The index " + query.IndexName + " cannot be found"); } if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0) { throw new InvalidOperationException("The document id or map group fields are mandatory"); } IndexSearcher searcher; using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher)) { var documentQuery = new BooleanQuery(); if (string.IsNullOrEmpty(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (string key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } var ir = searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, query); if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false) { var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId, null); if (stopWordsDoc == null) { throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found"); } var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>(); if (stopWordsSetup.StopWords != null) { var stopWords = stopWordsSetup.StopWords; var ht = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); foreach (var stopWord in stopWords) { ht.Add(stopWord); } mlt.SetStopWords(ht); } } var fieldNames = query.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List <Action>(); RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.Analyzer = perFieldAnalyzerWrapper; var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(pageSize, true); if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false) { var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, perFieldAnalyzerWrapper); mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { additionalQuery, Occur.MUST }, }; } searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray())); includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, query.Includes ?? new string[0], loadedIds); idsToLoad = new HashSet <string>(); database.TransactionalStorage.Batch(actions => { documentRetriever = new DocumentRetriever(database.Configuration, actions, database.ReadTriggers, query.TransformerParameters, idsToLoad); using (new CurrentTransformationScope(database, documentRetriever)) { foreach (var document in ProcessResults(query, jsonDocuments, database.WorkContext.CancellationToken)) { result.Results.Add(document); addIncludesCommand.Execute(document); } } }); addIncludesCommand.AlsoInclude(idsToLoad); var computeHash = Encryptor.Current.Hash.Compute16(includedEtags.ToArray()); Etag computedEtag = Etag.Parse(computeHash); return(new MoreLikeThisQueryResult { Etag = computedEtag, Result = result, }); } finally { if (perFieldAnalyzerWrapper != null) { perFieldAnalyzerWrapper.Close(); } foreach (var action in toDispose) { action(); } } } }
public LazyMoreLikeThisOperation(LoadOperation loadOperation, MoreLikeThisQuery query) { _loadOperation = loadOperation; this.query = query; }
public void TestMoreLikeThisQuery() { Query query = new MoreLikeThisQuery("this is a test", new[] { "text" }, new MockAnalyzer(Random), "text"); QueryUtils.Check(Random, query, searcher); }
public QueryResult MoreLikeThis(MoreLikeThisQuery query) { return(AsyncHelpers.RunSync(() => asyncServerClient.MoreLikeThisAsync(query))); }
public LazyMoreLikeThisOperation(MultiLoadOperation multiLoadOperation, MoreLikeThisQuery query) { this.multiLoadOperation = multiLoadOperation; this.query = query; }
/// <summary> /// (non-Javadoc) /// @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element) /// </summary> public virtual Query GetQuery(XmlElement e) { string fieldsList = e.GetAttribute("fieldNames"); //a comma-delimited list of fields string[] fields = defaultFieldNames; if ((fieldsList != null) && (fieldsList.Trim().Length > 0)) { fields = fieldsList.Trim().Split(',').TrimEnd(); //trim the fieldnames for (int i = 0; i < fields.Length; i++) { fields[i] = fields[i].Trim(); } } //Parse any "stopWords" attribute //TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then //I use all analyzers/fields to generate multi-field compatible stop list string stopWords = e.GetAttribute("stopWords"); ISet <string> stopWordsSet = null; if ((stopWords != null) && (fields != null)) { stopWordsSet = new JCG.HashSet <string>(); foreach (string field in fields) { TokenStream ts = null; try { ts = analyzer.GetTokenStream(field, stopWords); ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { stopWordsSet.Add(termAtt.ToString()); } ts.End(); } catch (IOException ioe) { throw new ParserException("IoException parsing stop words list in " + GetType().Name + ":" + ioe.Message); } finally { IOUtils.DisposeWhileHandlingException(ts); } } } MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.GetText(e), fields, analyzer, fields[0]); mlt.MaxQueryTerms = DOMUtils.GetAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS); mlt.MinTermFrequency = DOMUtils.GetAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY); mlt.PercentTermsToMatch = DOMUtils.GetAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100; mlt.StopWords = stopWordsSet; int minDocFreq = DOMUtils.GetAttribute(e, "minDocFreq", -1); if (minDocFreq >= 0) { mlt.MinDocFreq = minDocFreq; } mlt.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f); return(mlt); }