This is a thread safe, single instance for a particular index.
Inheritance: IDisposable
        private IEnumerable <JsonDocument> GetJsonDocuments(MoreLikeThisQueryParameters parameters, IndexSearcher searcher, string index, IEnumerable <ScoreDoc> hits, int baseDocId)
        {
            if (string.IsNullOrEmpty(parameters.DocumentId) == false)
            {
                var documentIds = hits
                                  .Where(hit => hit.doc != baseDocId)
                                  .Select(hit => searcher.Doc(hit.doc).Get(Constants.DocumentIdFieldName))
                                  .Where(x => x != null)
                                  .Distinct();

                return(documentIds
                       .Select(docId => Database.Get(docId, null))
                       .Where(it => it != null)
                       .ToArray());
            }

            var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name()).Distinct().ToArray();
            var etag   = Database.GetIndexEtag(index, null);

            return(hits
                   .Where(hit => hit.doc != baseDocId)
                   .Select(hit => new JsonDocument
            {
                DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.doc), fields),
                Etag = etag
            })
                   .ToArray());
        }
Exemple #2
0
        private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable <ScoreDoc> hits, int baseDocId)
        {
            if (string.IsNullOrEmpty(parameters.DocumentId) == false)
            {
                var documentIds = hits
                                  .Where(hit => hit.Doc != baseDocId)
                                  .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName))
                                  .Where(x => x != null)
                                  .Distinct();

                return(documentIds
                       .Select(docId => database.Documents.Get(docId, null))
                       .Where(it => it != null)
                       .ToArray());
            }

            var fields = searcher.Doc(baseDocId).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray();
            var etag   = database.Indexes.GetIndexEtag(indexName, null);

            return(hits
                   .Where(hit => hit.Doc != baseDocId)
                   .Select(hit => new JsonDocument
            {
                DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc),
                                                            new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)),
                Etag = etag
            })
                   .ToArray());
        }
        public IndexViewModel(Index index, IDatabase database, IRavenScreen parent)
        {
            _index = index;
            Database = database;

            ParentRavenScreen = parent;
            DisplayName = "Edit Index";
            CompositionInitializer.SatisfyImports(this);
        }
Exemple #4
0
        private JsonDocument[] GetJsonDocuments(IndexSearcher searcher, Index index, string indexName, ScoreDoc[] scoreDocs)
        {
            if (scoreDocs.Any())
            {
                // Since we don't have a document we get the fields from the first hit
                var fields = searcher.Doc(scoreDocs.First().Doc).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().ToArray();
                var etag   = database.Indexes.GetIndexEtag(indexName, null);
                return(scoreDocs
                       .Select(hit => new JsonDocument
                {
                    DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc),
                                                                new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)),
                    Etag = etag
                })
                       .ToArray());
            }

            return(new JsonDocument[0]);
        }
        private IEnumerable <RavenJObject> ProcessResults(MoreLikeThisQuery query, IEnumerable <JsonDocument> documents, CancellationToken token)
        {
            IndexingFunc transformFunc = null;

            if (string.IsNullOrEmpty(query.ResultsTransformer) == false)
            {
                var transformGenerator = database.IndexDefinitionStorage.GetTransformer(query.ResultsTransformer);

                if (transformGenerator != null && transformGenerator.TransformResultsDefinition != null)
                {
                    transformFunc = transformGenerator.TransformResultsDefinition;
                }
                else
                {
                    throw new InvalidOperationException("The transformer " + query.ResultsTransformer + " was not found");
                }
            }

            IEnumerable <RavenJObject> results;
            var transformerErrors = new List <string>();

            if (transformFunc == null)
            {
                results = documents.Select(x => x.ToJson());
            }
            else
            {
                var robustEnumerator = new RobustEnumerator(token, 100)
                {
                    OnError =
                        (exception, o) =>
                        transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o),
                                                            exception.Message))
                };

                results = robustEnumerator
                          .RobustEnumeration(documents.Select(x => new DynamicJsonObject(x.ToJson())).GetEnumerator(), transformFunc)
                          .Select(JsonExtensions.ToJObject);
            }

            return(results);
        }
		public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key, 
			StringDistance distanceType, bool isRunInMemory, string field, float accuracy)
		{
			_indexInstance = indexInstance;
			this.workContext = workContext;
			this.field = field;

			if (isRunInMemory)
			{
				directory = new RAMDirectory();
			}
			else
			{
				directory = FSDirectory.Open(new DirectoryInfo(key));
			}

			spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
			spellChecker.SetAccuracy(accuracy);
			spellChecker.setStringDistance(distanceType);
			_operationText = "Suggestions for " + field + " " + distanceType + " (" + accuracy + ")";
		}
Exemple #7
0
			public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers)
			{
				this.parent = parent;
				this.indexQuery = indexQuery;
				this.shouldIncludeInResults = shouldIncludeInResults;
				this.fieldsToFetch = fieldsToFetch;
				this.indexQueryTriggers = indexQueryTriggers;

				if (fieldsToFetch.IsDistinctQuery)
					alreadyReturned = new HashSet<RavenJObject>(new RavenJTokenEqualityComparer());
				
			}
        private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
        {
            IndexSearcher searcher;

            using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (!string.IsNullOrEmpty(parameters.DocumentId))
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())),
                                      BooleanClause.Occur.MUST);
                }

                foreach (string key in parameters.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
                                      BooleanClause.Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    context.SetStatusToNotFound();
                    context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
                    return;
                }

                var ir  = searcher.GetIndexReader();
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, parameters);

                if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
                {
                    var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        context.SetStatusToNotFound();
                        context.WriteJson(
                            new
                        {
                            Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
                        });
                        return;
                    }
                    var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords;
                    mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower())));
                }

                var fieldNames = parameters.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].doc);
                    var tsdc     = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

                    foreach (var jsonDocumet in jsonDocuments)
                    {
                        result.Results.Add(jsonDocumet.ToJson());
                        addIncludesCommand.Execute(jsonDocumet.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    if (context.MatchEtag(computedEtag))
                    {
                        context.SetStatusToNotModified();
                        return;
                    }

                    context.Response.AddHeader("ETag", computedEtag.ToString());
                    context.WriteJson(result);
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }
Exemple #9
0
		private void LoadExistingSuggesionsExtentions(string indexName, Index indexImplementation)
		{
			var suggestionsForIndex = Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName);
			if (!Directory.Exists(suggestionsForIndex))
				return;

			foreach (var directory in Directory.GetDirectories(suggestionsForIndex))
			{
				IndexSearcher searcher;
				using (indexImplementation.GetSearcher(out searcher))
				{
					var key = Path.GetFileName(directory);
					var decodedKey = MonoHttpUtility.UrlDecode(key);
					var lastIndexOfDash = decodedKey.LastIndexOf('-');
					var accuracy = float.Parse(decodedKey.Substring(lastIndexOfDash + 1));
					var lastIndexOfDistance = decodedKey.LastIndexOf('-', lastIndexOfDash - 1);
					StringDistanceTypes distanceType;
					Enum.TryParse(decodedKey.Substring(lastIndexOfDistance + 1, lastIndexOfDash - lastIndexOfDistance - 1),
								  true, out distanceType);
					var field = decodedKey.Substring(0, lastIndexOfDistance);
					var extension = new SuggestionQueryIndexExtension(
						Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName, key), searcher.IndexReader,
						SuggestionQueryRunner.GetStringDistance(distanceType),
						field,
						accuracy);
					indexImplementation.SetExtension(key, extension);
				}
			}
		}
Exemple #10
0
 protected override bool ShouldSkipIndex(Index index)
 {
     return(false);
 }
Exemple #11
0
        private void TryApplyPrecomputedBatchForNewIndex(Index index, IndexDefinition definition)
        {
            var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId);
            if (generator.ForEntityNames.Count == 0)
            {
                // we don't optimize if we don't have what to optimize _on, we know this is going to return all docs.
                // no need to try to optimize that, then
				index.IsMapIndexingInProgress = false;
				return;
            }

            try
            {
                Task.Factory.StartNew(() => ApplyPrecomputedBatchForNewIndex(index, generator),
                    TaskCreationOptions.LongRunning)
                    .ContinueWith(t =>
                    {
                        if (t.IsFaulted)
                        {
                            Log.Warn("Could not apply precomputed batch for index " + index, t.Exception);
                        }
                        index.IsMapIndexingInProgress = false;
                        WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed");
                        WorkContext.NotifyAboutWork();

                    });
            }
            catch (Exception)
            {
                index.IsMapIndexingInProgress = false;
                throw;
            }
        }
        private Func<long> TryCreateTaskForApplyingPrecomputedBatchForNewIndex(Index index, IndexDefinition definition)
        {
            if (Database.Configuration.MaxPrecomputedBatchSizeForNewIndex <= 0) //precaution -> should never be lower than 0
            {
                index.IsMapIndexingInProgress = false;
                return null;
            }

            var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId);
            if (generator.ForEntityNames.Count == 0 && index.IsTestIndex == false)
            {
                // we don't optimize if we don't have what to optimize _on_, we know this is going to return all docs.
                // no need to try to optimize that, then
                index.IsMapIndexingInProgress = false;
                return null;
            }

            //only one precomputed batch can run at a time except for test indexes
            if (index.IsTestIndex == false)
            {
                lock (precomputedLock)
                {

                    if (isPrecomputedBatchForNewIndexIsRunning)
                    {
                        index.IsMapIndexingInProgress = false;
                        return null;
                    }

                    isPrecomputedBatchForNewIndexIsRunning = true;
                }
            }

            try
            {
                var cts = new CancellationTokenSource();
                var task = new Task(() =>
                {
                    try
                    {
                        ApplyPrecomputedBatchForNewIndex(index, generator,
                            index.IsTestIndex == false ?
                                Database.Configuration.MaxPrecomputedBatchSizeForNewIndex :
                                Database.Configuration.Indexing.MaxNumberOfItemsToProcessInTestIndexes, cts);
                    }
                    catch (TotalDataSizeExceededException e)
                    {
                        Log.Warn(string.Format(
                            @"Aborting applying precomputed batch for index {0}, 
                                because total data size gatherered exceeded 
                                configured data size ({1} bytes)", 
                            index, Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) , e);
                        throw;
                    }
                    catch (Exception e)
                    {
                        Log.Warn("Could not apply precomputed batch for index " + index, e);
                    }
                    finally
                    {
                        if (index.IsTestIndex == false)
                            isPrecomputedBatchForNewIndexIsRunning = false;
                        index.IsMapIndexingInProgress = false;
                        WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed");
                        WorkContext.NotifyAboutWork();
                    }
                }, TaskCreationOptions.LongRunning);

                return () =>
                {
                    try
                    {
                        task.Start();

                        long id;
                        Database
                            .Tasks
                            .AddTask(
                                task,
                                new TaskBasedOperationState(task),
                                new TaskActions.PendingTaskDescription
                                {
                                    StartTime = DateTime.UtcNow,
                                    Payload = index.PublicName,
                                    TaskType = TaskActions.PendingTaskType.NewIndexPrecomputedBatch
                                },
                                out id,
                                cts);
                        return id;
                    }
                    catch (Exception)
                    {
                        index.IsMapIndexingInProgress = false;
                        if (index.IsTestIndex == false)
                            isPrecomputedBatchForNewIndexIsRunning = false;
                        throw;
                    }
                };
            }
            catch (Exception)
            {
                index.IsMapIndexingInProgress = false;
                if (index.IsTestIndex == false)
                    isPrecomputedBatchForNewIndexIsRunning = false;
                throw;
            }
        }
Exemple #13
0
		private void CheckMapIndexState(IDictionary<string, string> commitData, IndexDefinition indexDefinition, Index index)
		{
			string value;
			Etag lastEtag = null;
			if (commitData != null && commitData.TryGetValue("LastEtag", out value))
				Etag.TryParse(value, out lastEtag); // etag will be null if parsing will fail

			var lastStoredEtag = GetLastEtagForIndex(index) ?? Etag.Empty;
			lastEtag = lastEtag ?? Etag.Empty;

			if (EtagUtil.IsGreaterThanOrEqual(lastEtag, lastStoredEtag))
				return;

			var now = SystemTime.UtcNow;
			ResetLastIndexedEtag(indexDefinition, lastEtag, now);
		}
Exemple #14
0
			public IndexQueryOperation(
				Index parent,
				IndexQuery indexQuery,
				Func<IndexQueryResult, bool> shouldIncludeInResults,
				FieldsToFetch fieldsToFetch)
			{
				this.parent = parent;
				this.indexQuery = indexQuery;
				this.shouldIncludeInResults = shouldIncludeInResults;
				this.fieldsToFetch = fieldsToFetch;

				if (fieldsToFetch.IsDistinctQuery)
					alreadyReturned = new HashSet<JObject>(new JTokenEqualityComparer());
				
			}
Exemple #15
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List<JsonDocument>();
            TransactionalStorage.Batch(actions =>
            {
                var query = GetQueryForAllMatchingDocumentsForIndex(generator);

                using (DocumentCacher.SkipSetDocumentsInDocumentCache())
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                {
                    Query = query,
                    PageSize = pageSize
                }, actions, linked)
                {
                    ShouldSkipDuplicateChecking = true
                })
                {
                    op.Init();

                    //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results
                    if (index.IsTestIndex == false && op.Header.TotalResults > pageSize)
                    {
                        // we don't apply this optimization if the total number of results 
                        // to index is more than the max numbers to index in a single batch. 
                        // The idea here is that we need to keep the amount
                        // of memory we use to a manageable level even when introducing a new index to a BIG 
                        // database
                        try
                        {
                            cts.Cancel();
                            // we have to run just a little bit of the query to properly setup the disposal
                            op.Execute(o => { });
                        }
                        catch (OperationCanceledException)
                        {
                        }
                        return;
                    }

                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", 
                            index, op.Header.TotalResults);
                    }
                        
                    var totalLoadedDocumentSize = 0;
                    const int totalSizeToCheck = 16 * 1024 * 1024; //16MB
                    var localLoadedDocumentSize = 0;
                    op.Execute(document =>
                    {
                        var metadata = document.Value<RavenJObject>(Constants.Metadata);
                        var key = metadata.Value<string>("@id");
                        var etag = Etag.Parse(metadata.Value<string>("@etag"));
                        var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified));
                        document.Remove(Constants.Metadata);
                        var serializedSizeOnDisk = metadata.Value<int>(Constants.SerializedSizeOnDisk);
                        metadata.Remove(Constants.SerializedSizeOnDisk);

                        var doc = new JsonDocument
                        {
                            DataAsJson = document,
                            Etag = etag,
                            Key = key,
                            SerializedSizeOnDisk = serializedSizeOnDisk,
                            LastModified = lastModified,
                            SkipDeleteFromIndex = true,
                            Metadata = metadata
                        };

                        docsToIndex.Add(doc);
                        totalLoadedDocumentSize += serializedSizeOnDisk;
                        localLoadedDocumentSize += serializedSizeOnDisk;

                        if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes)
                        {
                            var error = string.Format(
                                @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of documents that were fetched
                                    and the configured max data to fetch is {3}mb",
                                index.indexId, index.PublicName, totalLoadedDocumentSize, 
                                Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024);

                            //we are aborting operation, so don't keep the references
                            docsToIndex.Clear(); 
                            throw new TotalDataSizeExceededException(error);
                        }


                        if (localLoadedDocumentSize <= totalSizeToCheck)
                            return;

                        localLoadedDocumentSize = 0;

                        if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb)
                        {
                            var error = string.Format(
                                @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of available memory and the available memory for processing is: {3}mb",
                                index.indexId, index.PublicName, 
                                MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb);

                            //we are aborting operation, so don't keep the references
                            docsToIndex.Clear();
                            throw new TotalDataSizeExceededException(error);
                        }
                    });

                    result = new PrecomputedIndexingBatch
                    {
                        LastIndexed = op.Header.IndexEtag,
                        LastModified = op.Header.IndexTimestamp,
                        Documents = docsToIndex,
                        Index = index
                    };
                }
            });

            if (result != null && result.Documents != null && result.Documents.Count >= 0)
            {
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                {
                    Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token);

                    if (index.IsTestIndex)
                        TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId));
                }
            }

        }
 protected abstract bool ShouldSkipIndex(Index index);
Exemple #17
0
        private static void FlushIndex(Index value, bool onlyAddIndexError = false)
        {
            var sp = Stopwatch.StartNew();
            
            try
            {
                value.Flush(value.GetLastEtagFromStats());
            }
            catch (Exception e)
            {
                value.IncrementWriteErrors(e);
                log.WarnException(string.Format("Failed to flush {0} index: {1} (id: {2})",
                    GetIndexType(value.IsMapReduce), value.PublicName, value.IndexId), e);

                if (onlyAddIndexError)
                {
                    value.AddIndexFailedFlushError(e);
                    return;
                }

                throw;
            }

            if (log.IsDebugEnabled)
            {
                log.Debug("Flushed {0} index: {1} (id: {2}), took {3}ms",
                    GetIndexType(value.IsMapReduce), value.PublicName, value.IndexId, sp.ElapsedMilliseconds);
            }
        }
Exemple #18
0
		protected override bool ShouldSkipIndex(Index index)
		{
			return index.IsTestIndex;
		}
		private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable<ScoreDoc> hits, int baseDocId)
		{
			if (string.IsNullOrEmpty(parameters.DocumentId) == false)
			{
				var documentIds = hits
					.Where(hit => hit.Doc != baseDocId)
					.Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName))
					.Where(x => x != null)
					.Distinct();

				return documentIds
					.Select(docId => database.Documents.Get(docId, null))
					.Where(it => it != null)
					.ToArray();
			}

			var fields = searcher.Doc(baseDocId).GetFields().Cast<AbstractField>().Select(x => x.Name).Distinct().ToArray();
			var etag = database.Indexes.GetIndexEtag(indexName, null);
			return hits
				.Where(hit => hit.Doc != baseDocId)
				.Select(hit => new JsonDocument
				{
					DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc),
					                                            new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)),
					Etag = etag
				})
				.ToArray();
		}
Exemple #20
0
		private void CheckMapIndexState(IDictionary<string, string> commitData, IndexDefinition indexDefinition, Index index)
		{
			string value;
			Etag lastEtag = null;
			if (commitData != null && commitData.TryGetValue("LastEtag", out value))
				Etag.TryParse(value, out lastEtag); // etag will be null if parsing will fail

			var lastStoredEtag = GetLastEtagForIndex(index) ?? Etag.Empty;
			lastEtag = lastEtag ?? Etag.Empty;

			if (EtagUtil.IsGreaterThanOrEqual(lastEtag, lastStoredEtag))
				return;

			log.Info(string.Format("Resetting index '{0} ({1})'. Last stored etag: {2}. Last commit etag: {3}.", indexDefinition.Name, index.indexId, lastStoredEtag, lastEtag));

			var now = SystemTime.UtcNow;
			ResetLastIndexedEtag(indexDefinition, lastEtag, now);
		}
Exemple #21
0
		private void LoadExistingSuggestionsExtentions(string indexName, Index indexImplementation)
		{
			var suggestionsForIndex = Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName);
			if (!Directory.Exists(suggestionsForIndex))
				return;

		    try
		    {
		        foreach (var directory in Directory.GetDirectories(suggestionsForIndex))
		        {
		            IndexSearcher searcher;
		            using (indexImplementation.GetSearcher(out searcher))
		            {
		                var key = Path.GetFileName(directory);
		                var decodedKey = MonoHttpUtility.UrlDecode(key);
		                var lastIndexOfDash = decodedKey.LastIndexOf('-');
						var accuracy = float.Parse(decodedKey.Substring(lastIndexOfDash + 1), CultureInfo.InvariantCulture);
		                var lastIndexOfDistance = decodedKey.LastIndexOf('-', lastIndexOfDash - 1);
		                StringDistanceTypes distanceType;
		                Enum.TryParse(decodedKey.Substring(lastIndexOfDistance + 1, lastIndexOfDash - lastIndexOfDistance - 1),
		                              true, out distanceType);
		                var field = decodedKey.Substring(0, lastIndexOfDistance);
		                var extension = new SuggestionQueryIndexExtension(
							indexImplementation,
		                    documentDatabase.WorkContext,
							Path.Combine(configuration.IndexStoragePath, "Raven-Suggestions", indexName, key),
		                    SuggestionQueryRunner.GetStringDistance(distanceType),
							searcher.IndexReader.Directory() is RAMDirectory,
		                    field,
		                    accuracy);
		                indexImplementation.SetExtension(key, extension);
		            }
		        }
		    }
		    catch (Exception e)
		    {
		        log.WarnException("Could not open suggestions for index " + indexName + ", resetting the index", e);
		        try
		        {
		            IOExtensions.DeleteDirectory(suggestionsForIndex);
		        }
		        catch (Exception)
		        {
		            // ignore the failure
		        }
		        throw;
		    }
		}
Exemple #22
0
        public QueryResult Query(string index, IndexQuery query)
        {
            index = IndexDefinitionStorage.FixupIndexName(index);
            var list  = new List <JObject>();
            var stale = false;
            Tuple <DateTime, Guid> indexTimestamp = null;

            TransactionalStorage.Batch(
                actions =>
            {
                string entityName = null;


                var viewGenerator = IndexDefinitionStorage.GetViewGenerator(index);
                if (viewGenerator == null)
                {
                    throw new InvalidOperationException("Could not find index named: " + index);
                }

                entityName = viewGenerator.ForEntityName;

                stale          = actions.Staleness.IsIndexStale(index, query.Cutoff, entityName);
                indexTimestamp = actions.Staleness.IndexLastUpdatedAt(index);
                var indexFailureInformation = actions.Indexing.GetFailureRate(index);
                if (indexFailureInformation.IsInvalidIndex)
                {
                    throw new IndexDisabledException(indexFailureInformation);
                }
                var docRetriever    = new DocumentRetriever(actions, ReadTriggers);
                var indexDefinition = GetIndexDefinition(index);
                var fieldsToFetch   = new FieldsToFetch(query.FieldsToFetch, query.AggregationOperation,
                                                        viewGenerator.ReduceDefinition == null
                                                                                ? Abstractions.Data.Constants.DocumentIdFieldName
                                                                                : Abstractions.Data.Constants.ReduceKeyFieldName);
                var collection = from queryResult in IndexStorage.Query(index, query, result => docRetriever.ShouldIncludeResultInQuery(result, indexDefinition, fieldsToFetch), fieldsToFetch)
                                 select docRetriever.RetrieveDocumentForQuery(queryResult, indexDefinition, fieldsToFetch)
                                 into doc
                                 where doc != null
                                 select doc;

                var transformerErrors = new List <string>();
                IEnumerable <JObject> results;
                if (viewGenerator != null &&
                    query.SkipTransformResults == false &&
                    viewGenerator.TransformResultsDefinition != null)
                {
                    var robustEnumerator = new RobustEnumerator
                    {
                        OnError =
                            (exception, o) =>
                            transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o),
                                                                exception.Message))
                    };
                    var dynamicJsonObjects = collection.Select(x => new DynamicJsonObject(x.ToJson())).ToArray();
                    results =
                        robustEnumerator.RobustEnumeration(
                            dynamicJsonObjects,
                            source => viewGenerator.TransformResultsDefinition(docRetriever, source))
                        .Select(JsonExtensions.ToJObject);
                }
                else
                {
                    results = collection.Select(x => x.ToJson());
                }

                list.AddRange(results);

                if (transformerErrors.Count > 0)
                {
                    throw new InvalidOperationException("The transform results function failed.\r\n" + string.Join("\r\n", transformerErrors));
                }
            });
            return(new QueryResult
            {
                IndexName = index,
                Results = list,
                IsStale = stale,
                SkippedResults = query.SkippedResults.Value,
                TotalResults = query.TotalSize.Value,
                IndexTimestamp = indexTimestamp.Item1,
                IndexEtag = indexTimestamp.Item2
            });
        }
		private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
		{
			IndexSearcher searcher;
			using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
			{
				var documentQuery = new BooleanQuery();

				if (!string.IsNullOrEmpty(parameters.DocumentId))
				{
					documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId)),
					                  Lucene.Net.Search.BooleanClause.Occur.MUST);
				}

				foreach (string key in parameters.MapGroupFields.Keys)
				{
					documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
					                  Lucene.Net.Search.BooleanClause.Occur.MUST);
				}

				var td = searcher.Search(documentQuery, 1);

				// get the current Lucene docid for the given RavenDB doc ID
				if (td.ScoreDocs.Length == 0)
				{
					context.SetStatusToNotFound();
					context.WriteJson(new {Error = "Document " + parameters.DocumentId + " could not be found"});
					return;
				}

				var ir = searcher.GetIndexReader();
				var mlt = new RavenMoreLikeThis(ir);

				AssignParameters(mlt, parameters);

				if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
				{
					var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
					if (stopWordsDoc == null)
					{
						context.SetStatusToNotFound();
						context.WriteJson(
							new
							{
								Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
							});
						return;
					}
					var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>().StopWords;
					mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower())));
				}

				var fieldNames = parameters.Fields ?? GetFieldNames(ir);
				mlt.SetFieldNames(fieldNames);

				var toDispose = new List<Action>();
				PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
				try
				{
					perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
					mlt.SetAnalyzer(perFieldAnalyzerWrapper);

					var mltQuery = mlt.Like(td.ScoreDocs[0].doc);
					var tsdc = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
					searcher.Search(mltQuery, tsdc);
					var hits = tsdc.TopDocs().ScoreDocs;
					var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc);

					var result = new MultiLoadResult();

					var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
					includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
					var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key));
					var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
					{
						includedEtags.AddRange(etag.ToByteArray());
						result.Includes.Add(includedDoc);
					}, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

					foreach (var jsonDocumet in jsonDocuments)
					{
						result.Results.Add(jsonDocumet.ToJson());
						addIncludesCommand.Execute(jsonDocumet.DataAsJson);
					}

					Guid computedEtag;
					using (var md5 = MD5.Create())
					{
						var computeHash = md5.ComputeHash(includedEtags.ToArray());
						computedEtag = new Guid(computeHash);
					}

					if (context.MatchEtag(computedEtag))
					{
						context.SetStatusToNotModified();
						return;
					}

					context.Response.AddHeader("ETag", computedEtag.ToString());
					context.WriteJson(result);
				}
				finally
				{
					if (perFieldAnalyzerWrapper != null)
						perFieldAnalyzerWrapper.Close();
					foreach (var action in toDispose)
					{
						action();
					}
				}
			}
		}
Exemple #24
0
		protected override bool ShouldSkipIndex(Index index)
		{
			return false;
		}
Exemple #25
0
 protected override bool ShouldSkipIndex(Index index)
 {
     return index.IsTestIndex ||
            index.IsMapIndexingInProgress; // precomputed? slow? it is already running, nothing to do with it for now;
 }
Exemple #26
0
	    private void CheckIndexState(Lucene.Net.Store.Directory directory, IndexDefinition indexDefinition, Index index, bool resetTried)
	    {
            if (configuration.ResetIndexOnUncleanShutdown == false)
                return;

			// 1. If commitData is null it means that there were no commits, so just in case we are resetting to Etag.Empty
			// 2. If no 'LastEtag' in commitData then we consider it an invalid index
			// 3. If 'LastEtag' is present (and valid), then resetting to it (if it is lower than lastStoredEtag)

			var commitData = IndexReader.GetCommitUserData(directory);

		    if (index.IsMapReduce)
				CheckMapReduceIndexState(commitData, resetTried);
			else
				CheckMapIndexState(commitData, indexDefinition, index);
	    }
Exemple #27
0
			public DuplicateDocumentRecorder(Searchable indexSearcher,
				Index parent,
				HashSet<string> documentsAlreadySeenInPreviousPage,
				HashSet<RavenJObject> alreadyReturned,
				FieldsToFetch fieldsToFetch,
				bool isProjectionOrMapReduce)
			{
				this.indexSearcher = indexSearcher;
				this.parent = parent;
				this.isProjectionOrMapReduce = isProjectionOrMapReduce;
				this.alreadyReturned = alreadyReturned;
				this.fieldsToFetch = fieldsToFetch;
				this.documentsAlreadySeenInPreviousPage = documentsAlreadySeenInPreviousPage;
			}
Exemple #28
0
		private static bool IsIdleAutoIndex(Index index)
		{
			return index.PublicName.StartsWith("Auto/") && index.Priority == IndexingPriority.Idle;
		}
Exemple #29
0
        private Action TryCreateTaskForApplyingPrecomputedBatchForNewIndex(Index index, IndexDefinition definition)
        {
            var generator = IndexDefinitionStorage.GetViewGenerator(definition.IndexId);
            if (generator.ForEntityNames.Count == 0 && index.IsTestIndex == false)
            {
                // we don't optimize if we don't have what to optimize _on_, we know this is going to return all docs.
                // no need to try to optimize that, then
				index.IsMapIndexingInProgress = false;
	            return null;
            }

			lock (precomputedLock)
			{
				if (isPrecomputedBatchForNewIndexIsRunning)
				{
					index.IsMapIndexingInProgress = false;
					return null;
				}

				isPrecomputedBatchForNewIndexIsRunning = true;
			}

            try
            {
				var cts = new CancellationTokenSource();
				var task = new Task(() =>
				{
					try
					{
						ApplyPrecomputedBatchForNewIndex(index, generator, index.IsTestIndex == false ? Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch : Database.Configuration.Indexing.MaxNumberOfItemsToProcessInTestIndexes, cts);
					}
					catch (Exception e)
					{
						Log.Warn("Could not apply precomputed batch for index " + index, e);
					}
					finally
					{
						isPrecomputedBatchForNewIndexIsRunning = false;
						index.IsMapIndexingInProgress = false;
						WorkContext.ShouldNotifyAboutWork(() => "Precomputed indexing batch for " + index.PublicName + " is completed");
						WorkContext.NotifyAboutWork();
					}
				}, TaskCreationOptions.LongRunning);

	            return () =>
	            {
		            try
		            {
			            task.Start();

			            long id;
			            Database
				            .Tasks
				            .AddTask(
					            task,
					            new TaskBasedOperationState(task),
					            new TaskActions.PendingTaskDescription
					            {
						            StartTime = DateTime.UtcNow,
						            Payload = index.PublicName,
						            TaskType = TaskActions.PendingTaskType.NewIndexPrecomputedBatch
					            },
					            out id,
					            cts);
		            }
		            catch (Exception)
		            {
						index.IsMapIndexingInProgress = false;
			            isPrecomputedBatchForNewIndexIsRunning = false;
			            throw;
		            }
	            };
            }
            catch (Exception)
            {
                index.IsMapIndexingInProgress = false;
				isPrecomputedBatchForNewIndexIsRunning = false;
                throw;
            }
        }
Exemple #30
0
        internal Etag GetLastEtagForIndex(Index index)
        {
	        if (index.IsMapReduce) 
				return null;

            IndexStats stats = null;
			documentDatabase.TransactionalStorage.Batch(accessor => stats = accessor.Indexing.GetIndexStats(index.IndexId));

	        return stats != null ? stats.LastIndexedEtag : Etag.Empty;
        }
Exemple #31
0
	    private void InvokeSuggestionIndexing(string name, IndexDefinition definition, Index index)
        {
            foreach (var suggestion in definition.Suggestions)
            {
                var field = suggestion.Key;
                var suggestionOption = suggestion.Value;

                if (suggestionOption.Distance == StringDistanceTypes.None)
                    continue;

                var indexExtensionKey =
                    MonoHttpUtility.UrlEncode(field + "-" + suggestionOption.Distance + "-" +
                                              suggestionOption.Accuracy);

                var suggestionQueryIndexExtension = new SuggestionQueryIndexExtension(
					index,
                     WorkContext,
                     Path.Combine(Database.Configuration.IndexStoragePath, "Raven-Suggestions", name, indexExtensionKey),
                     SuggestionQueryRunner.GetStringDistance(suggestionOption.Distance),
                     Database.Configuration.RunInMemory,
                     field,
                     suggestionOption.Accuracy);

                Database.IndexStorage.SetIndexExtension(name, indexExtensionKey, suggestionQueryIndexExtension);
            }
        }
Exemple #32
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator)
        {
            const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName";

            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List<JsonDocument>();
            TransactionalStorage.Batch(actions =>
            {
                var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList();

                var query = string.Join(" OR ", tags);

	            JsonDocument highestByEtag = null;

                var cts = new CancellationTokenSource();
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery
                {
                    Query = query,
					PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch
                }, actions, linked)
                {
                    ShouldSkipDuplicateChecking = true
                })
                {
                    op.Init();
                    if (op.Header.TotalResults == 0 ||
                        (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch))
                    {
                        // we don't apply this optimization if the total number of results 
						// to index is more than the max numbers to index in a single batch. 
						// The idea here is that we need to keep the amount
                        // of memory we use to a manageable level even when introducing a new index to a BIG 
                        // database
                        try
                        {
                            cts.Cancel();
                            // we have to run just a little bit of the query to properly setup the disposal
                            op.Execute(o => { });
                        }
                        catch (OperationCanceledException)
                        {
                        }
                        return;
                    }

                    Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                              op.Header.TotalResults);
                    op.Execute(document =>
                    {
                        var metadata = document.Value<RavenJObject>(Constants.Metadata);
                        var key = metadata.Value<string>("@id");
                        var etag = Etag.Parse(metadata.Value<string>("@etag"));
                        var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified));
                        document.Remove(Constants.Metadata);

	                    var doc = new JsonDocument
	                    {
		                    DataAsJson = document,
		                    Etag = etag,
		                    Key = key,
		                    LastModified = lastModified,
		                    SkipDeleteFromIndex = true,
		                    Metadata = metadata
	                    };

	                    docsToIndex.Add(doc);

	                    if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0)
		                    highestByEtag = doc;
                    });
                }

	            result = new PrecomputedIndexingBatch
                {
                    LastIndexed = highestByEtag.Etag,
                    LastModified = highestByEtag.LastModified.Value,
                    Documents = docsToIndex,
                    Index = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
                Database.IndexingExecuter.IndexPrecomputedBatch(result);

        }
Exemple #33
0
 protected override bool ShouldSkipIndex(Index index)
 {
     return(index.IsTestIndex ||
            index.IsMapIndexingInProgress); // precomputed? slow? it is already running, nothing to do with it for now;
 }