private IEnumerable <JsonDocument> GetJsonDocuments(MoreLikeThisQueryParameters parameters, IndexSearcher searcher, string index, IEnumerable <ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.doc != baseDocId) .Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return(documentIds .Select(docId => Database.Get(docId, null)) .Where(it => it != null) .ToArray()); } var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name()).Distinct().ToArray(); var etag = Database.GetIndexEtag(index, null); return(hits .Where(hit => hit.doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.doc), fields), Etag = etag }) .ToArray()); }
private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable <ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.Doc != baseDocId) .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return(documentIds .Select(docId => database.Documents.Get(docId, null)) .Where(it => it != null) .ToArray()); } var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray(); var etag = database.Indexes.GetIndexEtag(indexName, null); return(hits .Where(hit => hit.Doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)), Etag = etag }) .ToArray()); }
public IndexViewModel(Index index, IDatabase database, IRavenScreen parent) { _index = index; Database = database; ParentRavenScreen = parent; DisplayName = "Edit Index"; CompositionInitializer.SatisfyImports(this); }
private JsonDocument[] GetJsonDocuments(IndexSearcher searcher, Index index, string indexName, ScoreDoc[] scoreDocs) { if (scoreDocs.Any()) { // Since we don't have a document we get the fields from the first hit var fields = searcher.Doc(scoreDocs.First().Doc).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray(); var etag = database.Indexes.GetIndexEtag(indexName, null); return(scoreDocs .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)), Etag = etag }) .ToArray()); } return(new JsonDocument[0]); }
private IEnumerable <RavenJObject> ProcessResults(MoreLikeThisQuery query, IEnumerable <JsonDocument> documents, CancellationToken token) { IndexingFunc transformFunc = null; if (string.IsNullOrEmpty(query.ResultsTransformer) == false) { var transformGenerator = database.IndexDefinitionStorage.GetTransformer(query.ResultsTransformer); if (transformGenerator != null && transformGenerator.TransformResultsDefinition != null) { transformFunc = transformGenerator.TransformResultsDefinition; } else { throw new InvalidOperationException("The transformer " + query.ResultsTransformer + " was not found"); } } IEnumerable <RavenJObject> results; var transformerErrors = new List <string>(); if (transformFunc == null) { results = documents.Select(x => x.ToJson()); } else { var robustEnumerator = new RobustEnumerator(token, 100) { OnError = (exception, o) => transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o), exception.Message)) }; results = robustEnumerator .RobustEnumeration(documents.Select(x => new DynamicJsonObject(x.ToJson())).GetEnumerator(), transformFunc) .Select(JsonExtensions.ToJObject); } return(results); }
public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key, StringDistance distanceType, bool isRunInMemory, string field, float accuracy) { _indexInstance = indexInstance; this.workContext = workContext; this.field = field; if (isRunInMemory) { directory = new RAMDirectory(); } else { directory = FSDirectory.Open(new DirectoryInfo(key)); } spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null); spellChecker.SetAccuracy(accuracy); spellChecker.setStringDistance(distanceType); _operationText = "Suggestions for " + field + " " + distanceType + " (" + accuracy + ")"; }
public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers) { this.parent = parent; this.indexQuery = indexQuery; this.shouldIncludeInResults = shouldIncludeInResults; this.fieldsToFetch = fieldsToFetch; this.indexQueryTriggers = indexQueryTriggers; if (fieldsToFetch.IsDistinctQuery) alreadyReturned = new HashSet<RavenJObject>(new RavenJTokenEqualityComparer()); }
private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters) { IndexSearcher searcher; using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher)) { var documentQuery = new BooleanQuery(); if (!string.IsNullOrEmpty(parameters.DocumentId)) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())), BooleanClause.Occur.MUST); } foreach (string key in parameters.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])), BooleanClause.Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { context.SetStatusToNotFound(); context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" }); return; } var ir = searcher.GetIndexReader(); var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, parameters); if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId)) { var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null); if (stopWordsDoc == null) { context.SetStatusToNotFound(); context.WriteJson( new { Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found" }); return; } var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords; mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower()))); } var fieldNames = parameters.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List <Action>(); PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.SetAnalyzer(perFieldAnalyzerWrapper); var mltQuery = mlt.Like(td.ScoreDocs[0].doc); var tsdc = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc); var result = new MultiLoadResult(); var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray()); var loadedIds = new HashSet <string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds); foreach (var jsonDocumet in jsonDocuments) { result.Results.Add(jsonDocumet.ToJson()); addIncludesCommand.Execute(jsonDocumet.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } if (context.MatchEtag(computedEtag)) { context.SetStatusToNotModified(); return; } context.Response.AddHeader("ETag", computedEtag.ToString()); context.WriteJson(result); } finally { if (perFieldAnalyzerWrapper != null) { perFieldAnalyzerWrapper.Close(); } foreach (var action in toDispose) { action(); } } } }
private void LoadExistingSuggesionsExtentions(string indexName, Index indexImplementation) { var suggestionsForIndex = Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName); if (!Directory.Exists(suggestionsForIndex)) return; foreach (var directory in Directory.GetDirectories(suggestionsForIndex)) { IndexSearcher searcher; using (indexImplementation.GetSearcher(out searcher)) { var key = Path.GetFileName(directory); var decodedKey = MonoHttpUtility.UrlDecode(key); var lastIndexOfDash = decodedKey.LastIndexOf('-'); var accuracy = float.Parse(decodedKey.Substring(lastIndexOfDash + 1)); var lastIndexOfDistance = decodedKey.LastIndexOf('-', lastIndexOfDash - 1); StringDistanceTypes distanceType; Enum.TryParse(decodedKey.Substring(lastIndexOfDistance + 1, lastIndexOfDash - lastIndexOfDistance - 1), true, out distanceType); var field = decodedKey.Substring(0, lastIndexOfDistance); var extension = new SuggestionQueryIndexExtension( Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName, key), searcher.IndexReader, SuggestionQueryRunner.GetStringDistance(distanceType), field, accuracy); indexImplementation.SetExtension(key, extension); } } }
protected override bool ShouldSkipIndex(Index index) { return(false); }
private void TryApplyPrecomputedBatchForNewIndex(Index index, IndexDefinition definition) { var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId); if (generator.ForEntityNames.Count == 0) { // we don't optimize if we don't have what to optimize _on, we know this is going to return all docs. // no need to try to optimize that, then index.IsMapIndexingInProgress = false; return; } try { Task.Factory.StartNew(() => ApplyPrecomputedBatchForNewIndex(index, generator), TaskCreationOptions.LongRunning) .ContinueWith(t => { if (t.IsFaulted) { Log.Warn("Could not apply precomputed batch for index " + index, t.Exception); } index.IsMapIndexingInProgress = false; WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed"); WorkContext.NotifyAboutWork(); }); } catch (Exception) { index.IsMapIndexingInProgress = false; throw; } }
private Func<long> TryCreateTaskForApplyingPrecomputedBatchForNewIndex(Index index, IndexDefinition definition) { if (Database.Configuration.MaxPrecomputedBatchSizeForNewIndex <= 0) //precaution -> should never be lower than 0 { index.IsMapIndexingInProgress = false; return null; } var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId); if (generator.ForEntityNames.Count == 0 && index.IsTestIndex == false) { // we don't optimize if we don't have what to optimize _on_, we know this is going to return all docs. // no need to try to optimize that, then index.IsMapIndexingInProgress = false; return null; } //only one precomputed batch can run at a time except for test indexes if (index.IsTestIndex == false) { lock (precomputedLock) { if (isPrecomputedBatchForNewIndexIsRunning) { index.IsMapIndexingInProgress = false; return null; } isPrecomputedBatchForNewIndexIsRunning = true; } } try { var cts = new CancellationTokenSource(); var task = new Task(() => { try { ApplyPrecomputedBatchForNewIndex(index, generator, index.IsTestIndex == false ? Database.Configuration.MaxPrecomputedBatchSizeForNewIndex : Database.Configuration.Indexing.MaxNumberOfItemsToProcessInTestIndexes, cts); } catch (TotalDataSizeExceededException e) { Log.Warn(string.Format( @"Aborting applying precomputed batch for index {0}, because total data size gatherered exceeded configured data size ({1} bytes)", index, Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) , e); throw; } catch (Exception e) { Log.Warn("Could not apply precomputed batch for index " + index, e); } finally { if (index.IsTestIndex == false) isPrecomputedBatchForNewIndexIsRunning = false; index.IsMapIndexingInProgress = false; WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed"); WorkContext.NotifyAboutWork(); } }, TaskCreationOptions.LongRunning); return () => { try { task.Start(); long id; Database .Tasks .AddTask( task, new TaskBasedOperationState(task), new TaskActions.PendingTaskDescription { StartTime = DateTime.UtcNow, Payload = index.PublicName, TaskType = TaskActions.PendingTaskType.NewIndexPrecomputedBatch }, out id, cts); return id; } catch (Exception) { index.IsMapIndexingInProgress = false; if (index.IsTestIndex == false) isPrecomputedBatchForNewIndexIsRunning = false; throw; } }; } catch (Exception) { index.IsMapIndexingInProgress = false; if (index.IsTestIndex == false) isPrecomputedBatchForNewIndexIsRunning = false; throw; } }
private void CheckMapIndexState(IDictionary<string, string> commitData, IndexDefinition indexDefinition, Index index) { string value; Etag lastEtag = null; if (commitData != null && commitData.TryGetValue("LastEtag", out value)) Etag.TryParse(value, out lastEtag); // etag will be null if parsing will fail var lastStoredEtag = GetLastEtagForIndex(index) ?? Etag.Empty; lastEtag = lastEtag ?? Etag.Empty; if (EtagUtil.IsGreaterThanOrEqual(lastEtag, lastStoredEtag)) return; var now = SystemTime.UtcNow; ResetLastIndexedEtag(indexDefinition, lastEtag, now); }
public IndexQueryOperation( Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch) { this.parent = parent; this.indexQuery = indexQuery; this.shouldIncludeInResults = shouldIncludeInResults; this.fieldsToFetch = fieldsToFetch; if (fieldsToFetch.IsDistinctQuery) alreadyReturned = new HashSet<JObject>(new JTokenEqualityComparer()); }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var query = GetQueryForAllMatchingDocumentsForIndex(generator); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results if (index.IsTestIndex == false && op.Header.TotalResults > pageSize) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } if (Log.IsDebugEnabled) { Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); } var totalLoadedDocumentSize = 0; const int totalSizeToCheck = 16 * 1024 * 1024; //16MB var localLoadedDocumentSize = 0; op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var serializedSizeOnDisk = metadata.Value<int>(Constants.SerializedSizeOnDisk); metadata.Remove(Constants.SerializedSizeOnDisk); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, SerializedSizeOnDisk = serializedSizeOnDisk, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); totalLoadedDocumentSize += serializedSizeOnDisk; localLoadedDocumentSize += serializedSizeOnDisk; if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of documents that were fetched and the configured max data to fetch is {3}mb", index.indexId, index.PublicName, totalLoadedDocumentSize, Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } if (localLoadedDocumentSize <= totalSizeToCheck) return; localLoadedDocumentSize = 0; if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of available memory and the available memory for processing is: {3}mb", index.indexId, index.PublicName, MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } }); result = new PrecomputedIndexingBatch { LastIndexed = op.Header.IndexEtag, LastModified = op.Header.IndexTimestamp, Documents = docsToIndex, Index = index }; } }); if (result != null && result.Documents != null && result.Documents.Count >= 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } }
protected abstract bool ShouldSkipIndex(Index index);
private static void FlushIndex(Index value, bool onlyAddIndexError = false) { var sp = Stopwatch.StartNew(); try { value.Flush(value.GetLastEtagFromStats()); } catch (Exception e) { value.IncrementWriteErrors(e); log.WarnException(string.Format("Failed to flush {0} index: {1} (id: {2})", GetIndexType(value.IsMapReduce), value.PublicName, value.IndexId), e); if (onlyAddIndexError) { value.AddIndexFailedFlushError(e); return; } throw; } if (log.IsDebugEnabled) { log.Debug("Flushed {0} index: {1} (id: {2}), took {3}ms", GetIndexType(value.IsMapReduce), value.PublicName, value.IndexId, sp.ElapsedMilliseconds); } }
protected override bool ShouldSkipIndex(Index index) { return index.IsTestIndex; }
private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable<ScoreDoc> hits, int baseDocId) { if (string.IsNullOrEmpty(parameters.DocumentId) == false) { var documentIds = hits .Where(hit => hit.Doc != baseDocId) .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName)) .Where(x => x != null) .Distinct(); return documentIds .Select(docId => database.Documents.Get(docId, null)) .Where(it => it != null) .ToArray(); } var fields = searcher.Doc(baseDocId).GetFields().Cast<AbstractField>().Select(x => x.Name).Distinct().ToArray(); var etag = database.Indexes.GetIndexEtag(indexName, null); return hits .Where(hit => hit.Doc != baseDocId) .Select(hit => new JsonDocument { DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc), new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)), Etag = etag }) .ToArray(); }
private void CheckMapIndexState(IDictionary<string, string> commitData, IndexDefinition indexDefinition, Index index) { string value; Etag lastEtag = null; if (commitData != null && commitData.TryGetValue("LastEtag", out value)) Etag.TryParse(value, out lastEtag); // etag will be null if parsing will fail var lastStoredEtag = GetLastEtagForIndex(index) ?? Etag.Empty; lastEtag = lastEtag ?? Etag.Empty; if (EtagUtil.IsGreaterThanOrEqual(lastEtag, lastStoredEtag)) return; log.Info(string.Format("Resetting index '{0} ({1})'. Last stored etag: {2}. Last commit etag: {3}.", indexDefinition.Name, index.indexId, lastStoredEtag, lastEtag)); var now = SystemTime.UtcNow; ResetLastIndexedEtag(indexDefinition, lastEtag, now); }
private void LoadExistingSuggestionsExtentions(string indexName, Index indexImplementation) { var suggestionsForIndex = Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName); if (!Directory.Exists(suggestionsForIndex)) return; try { foreach (var directory in Directory.GetDirectories(suggestionsForIndex)) { IndexSearcher searcher; using (indexImplementation.GetSearcher(out searcher)) { var key = Path.GetFileName(directory); var decodedKey = MonoHttpUtility.UrlDecode(key); var lastIndexOfDash = decodedKey.LastIndexOf('-'); var accuracy = float.Parse(decodedKey.Substring(lastIndexOfDash + 1), CultureInfo.InvariantCulture); var lastIndexOfDistance = decodedKey.LastIndexOf('-', lastIndexOfDash - 1); StringDistanceTypes distanceType; Enum.TryParse(decodedKey.Substring(lastIndexOfDistance + 1, lastIndexOfDash - lastIndexOfDistance - 1), true, out distanceType); var field = decodedKey.Substring(0, lastIndexOfDistance); var extension = new SuggestionQueryIndexExtension( indexImplementation, documentDatabase.WorkContext, Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName, key), SuggestionQueryRunner.GetStringDistance(distanceType), searcher.IndexReader.Directory() is RAMDirectory, field, accuracy); indexImplementation.SetExtension(key, extension); } } } catch (Exception e) { log.WarnException("Could not open suggestions for index " + indexName + ", resetting the index", e); try { IOExtensions.DeleteDirectory(suggestionsForIndex); } catch (Exception) { // ignore the failure } throw; } }
public QueryResult Query(string index, IndexQuery query) { index = IndexDefinitionStorage.FixupIndexName(index); var list = new List <JObject>(); var stale = false; Tuple <DateTime, Guid> indexTimestamp = null; TransactionalStorage.Batch( actions => { string entityName = null; var viewGenerator = IndexDefinitionStorage.GetViewGenerator(index); if (viewGenerator == null) { throw new InvalidOperationException("Could not find index named: " + index); } entityName = viewGenerator.ForEntityName; stale = actions.Staleness.IsIndexStale(index, query.Cutoff, entityName); indexTimestamp = actions.Staleness.IndexLastUpdatedAt(index); var indexFailureInformation = actions.Indexing.GetFailureRate(index); if (indexFailureInformation.IsInvalidIndex) { throw new IndexDisabledException(indexFailureInformation); } var docRetriever = new DocumentRetriever(actions, ReadTriggers); var indexDefinition = GetIndexDefinition(index); var fieldsToFetch = new FieldsToFetch(query.FieldsToFetch, query.AggregationOperation, viewGenerator.ReduceDefinition == null ? Abstractions.Data.Constants.DocumentIdFieldName : Abstractions.Data.Constants.ReduceKeyFieldName); var collection = from queryResult in IndexStorage.Query(index, query, result => docRetriever.ShouldIncludeResultInQuery(result, indexDefinition, fieldsToFetch), fieldsToFetch) select docRetriever.RetrieveDocumentForQuery(queryResult, indexDefinition, fieldsToFetch) into doc where doc != null select doc; var transformerErrors = new List <string>(); IEnumerable <JObject> results; if (viewGenerator != null && query.SkipTransformResults == false && viewGenerator.TransformResultsDefinition != null) { var robustEnumerator = new RobustEnumerator { OnError = (exception, o) => transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o), exception.Message)) }; var dynamicJsonObjects = collection.Select(x => new DynamicJsonObject(x.ToJson())).ToArray(); results = robustEnumerator.RobustEnumeration( dynamicJsonObjects, source => viewGenerator.TransformResultsDefinition(docRetriever, source)) .Select(JsonExtensions.ToJObject); } else { results = collection.Select(x => x.ToJson()); } list.AddRange(results); if (transformerErrors.Count > 0) { throw new InvalidOperationException("The transform results function failed.\r\n" + string.Join("\r\n", transformerErrors)); } }); return(new QueryResult { IndexName = index, Results = list, IsStale = stale, SkippedResults = query.SkippedResults.Value, TotalResults = query.TotalSize.Value, IndexTimestamp = indexTimestamp.Item1, IndexEtag = indexTimestamp.Item2 }); }
private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters) { IndexSearcher searcher; using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher)) { var documentQuery = new BooleanQuery(); if (!string.IsNullOrEmpty(parameters.DocumentId)) { documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId)), Lucene.Net.Search.BooleanClause.Occur.MUST); } foreach (string key in parameters.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])), Lucene.Net.Search.BooleanClause.Occur.MUST); } var td = searcher.Search(documentQuery, 1); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { context.SetStatusToNotFound(); context.WriteJson(new {Error = "Document " + parameters.DocumentId + " could not be found"}); return; } var ir = searcher.GetIndexReader(); var mlt = new RavenMoreLikeThis(ir); AssignParameters(mlt, parameters); if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId)) { var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null); if (stopWordsDoc == null) { context.SetStatusToNotFound(); context.WriteJson( new { Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found" }); return; } var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>().StopWords; mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower()))); } var fieldNames = parameters.Fields ?? GetFieldNames(ir); mlt.SetFieldNames(fieldNames); var toDispose = new List<Action>(); PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null; try { perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true); mlt.SetAnalyzer(perFieldAnalyzerWrapper); var mltQuery = mlt.Like(td.ScoreDocs[0].doc); var tsdc = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true); searcher.Search(mltQuery, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc); var result = new MultiLoadResult(); var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray())); includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray()); var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key)); var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) => { includedEtags.AddRange(etag.ToByteArray()); result.Includes.Add(includedDoc); }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds); foreach (var jsonDocumet in jsonDocuments) { result.Results.Add(jsonDocumet.ToJson()); addIncludesCommand.Execute(jsonDocumet.DataAsJson); } Guid computedEtag; using (var md5 = MD5.Create()) { var computeHash = md5.ComputeHash(includedEtags.ToArray()); computedEtag = new Guid(computeHash); } if (context.MatchEtag(computedEtag)) { context.SetStatusToNotModified(); return; } context.Response.AddHeader("ETag", computedEtag.ToString()); context.WriteJson(result); } finally { if (perFieldAnalyzerWrapper != null) perFieldAnalyzerWrapper.Close(); foreach (var action in toDispose) { action(); } } } }
protected override bool ShouldSkipIndex(Index index) { return false; }
protected override bool ShouldSkipIndex(Index index) { return index.IsTestIndex || index.IsMapIndexingInProgress; // precomputed? slow? it is already running, nothing to do with it for now; }
private void CheckIndexState(Lucene.Net.Store.Directory directory, IndexDefinition indexDefinition, Index index, bool resetTried) { if (configuration.ResetIndexOnUncleanShutdown == false) return; // 1. If commitData is null it means that there were no commits, so just in case we are resetting to Etag.Empty // 2. If no 'LastEtag' in commitData then we consider it an invalid index // 3. If 'LastEtag' is present (and valid), then resetting to it (if it is lower than lastStoredEtag) var commitData = IndexReader.GetCommitUserData(directory); if (index.IsMapReduce) CheckMapReduceIndexState(commitData, resetTried); else CheckMapIndexState(commitData, indexDefinition, index); }
public DuplicateDocumentRecorder(Searchable indexSearcher, Index parent, HashSet<string> documentsAlreadySeenInPreviousPage, HashSet<RavenJObject> alreadyReturned, FieldsToFetch fieldsToFetch, bool isProjectionOrMapReduce) { this.indexSearcher = indexSearcher; this.parent = parent; this.isProjectionOrMapReduce = isProjectionOrMapReduce; this.alreadyReturned = alreadyReturned; this.fieldsToFetch = fieldsToFetch; this.documentsAlreadySeenInPreviousPage = documentsAlreadySeenInPreviousPage; }
private static bool IsIdleAutoIndex(Index index) { return index.PublicName.StartsWith("Auto/") && index.Priority == IndexingPriority.Idle; }
private Action TryCreateTaskForApplyingPrecomputedBatchForNewIndex(Index index, IndexDefinition definition) { var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId); if (generator.ForEntityNames.Count == 0 && index.IsTestIndex == false) { // we don't optimize if we don't have what to optimize _on_, we know this is going to return all docs. // no need to try to optimize that, then index.IsMapIndexingInProgress = false; return null; } lock (precomputedLock) { if (isPrecomputedBatchForNewIndexIsRunning) { index.IsMapIndexingInProgress = false; return null; } isPrecomputedBatchForNewIndexIsRunning = true; } try { var cts = new CancellationTokenSource(); var task = new Task(() => { try { ApplyPrecomputedBatchForNewIndex(index, generator, index.IsTestIndex == false ? Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch : Database.Configuration.Indexing.MaxNumberOfItemsToProcessInTestIndexes, cts); } catch (Exception e) { Log.Warn("Could not apply precomputed batch for index " + index, e); } finally { isPrecomputedBatchForNewIndexIsRunning = false; index.IsMapIndexingInProgress = false; WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed"); WorkContext.NotifyAboutWork(); } }, TaskCreationOptions.LongRunning); return () => { try { task.Start(); long id; Database .Tasks .AddTask( task, new TaskBasedOperationState(task), new TaskActions.PendingTaskDescription { StartTime = DateTime.UtcNow, Payload = index.PublicName, TaskType = TaskActions.PendingTaskType.NewIndexPrecomputedBatch }, out id, cts); } catch (Exception) { index.IsMapIndexingInProgress = false; isPrecomputedBatchForNewIndexIsRunning = false; throw; } }; } catch (Exception) { index.IsMapIndexingInProgress = false; isPrecomputedBatchForNewIndexIsRunning = false; throw; } }
internal Etag GetLastEtagForIndex(Index index) { if (index.IsMapReduce) return null; IndexStats stats = null; documentDatabase.TransactionalStorage.Batch(accessor => stats = accessor.Indexing.GetIndexStats(index.IndexId)); return stats != null ? stats.LastIndexedEtag : Etag.Empty; }
private void InvokeSuggestionIndexing(string name, IndexDefinition definition, Index index) { foreach (var suggestion in definition.Suggestions) { var field = suggestion.Key; var suggestionOption = suggestion.Value; if (suggestionOption.Distance == StringDistanceTypes.None) continue; var indexExtensionKey = MonoHttpUtility.UrlEncode(field + "-" + suggestionOption.Distance + "-" + suggestionOption.Accuracy); var suggestionQueryIndexExtension = new SuggestionQueryIndexExtension( index, WorkContext, Path.Combine(Database.Configuration.IndexStoragePath, "Raven-Suggestions", name, indexExtensionKey), SuggestionQueryRunner.GetStringDistance(suggestionOption.Distance), Database.Configuration.RunInMemory, field, suggestionOption.Accuracy); Database.IndexStorage.SetIndexExtension(name, indexExtensionKey, suggestionQueryIndexExtension); } }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator) { const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName"; PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList(); var query = string.Join(" OR ", tags); JsonDocument highestByEtag = null; var cts = new CancellationTokenSource(); using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch)) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0) highestByEtag = doc; }); } result = new PrecomputedIndexingBatch { LastIndexed = highestByEtag.Etag, LastModified = highestByEtag.LastModified.Value, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) Database.IndexingExecuter.IndexPrecomputedBatch(result); }
protected override bool ShouldSkipIndex(Index index) { return(index.IsTestIndex || index.IsMapIndexingInProgress); // precomputed? slow? it is already running, nothing to do with it for now; }