public static void ReadEntriesForFieldsFromTermVectors( IndexSearcherHolder.IndexSearcherHoldingState state, HashSet <string> fieldsToRead, HashSet <int> docIds, Action <string, string, int> onTermFound) { var reader = state.IndexSearcher.IndexReader; foreach (var docId in docIds) { foreach (var field in fieldsToRead) { var termFreqVector = reader.GetTermFreqVector(docId, field); if (termFreqVector == null) { continue; } foreach (var term in termFreqVector.GetTerms()) { if (LowPrecisionNumber(field, term)) { continue; } onTermFound(field, term, docId); } } } }
public static void ReadEntriesForFields( IndexSearcherHolder.IndexSearcherHoldingState state, HashSet <string> fieldsToRead, HashSet <int> docIds, Action <string, string, int> onTermFound) { var reader = state.IndexSearcher.IndexReader; state.Lock.EnterReadLock(); try { EnsureFieldsAreInCache(state, fieldsToRead, reader); foreach (var field in fieldsToRead) { foreach (var docId in docIds) { foreach (var term in state.GetTermsFromCache(field, docId)) { onTermFound(term.Field, term.Text, docId); } } } } finally { if (state.Lock.IsReadLockHeld) { state.Lock.ExitReadLock(); } } }
private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, IEnumerable <string> fieldsToRead, IndexReader reader) { foreach (var field in fieldsToRead) { var items = new LinkedList <IndexSearcherHolder.IndexSearcherHoldingState.CacheVal> [reader.MaxDoc]; using (var termDocs = reader.TermDocs()) { using (var termEnum = reader.Terms(new Term(field))) { do { if (termEnum.Term == null || field != termEnum.Term.Field) { break; } Term term = termEnum.Term; if (LowPrecisionNumber(term.Field, term.Text)) { continue; } var totalDocCountIncludedDeletes = termEnum.DocFreq(); termDocs.Seek(termEnum.Term); while (termDocs.Next() && totalDocCountIncludedDeletes > 0) { totalDocCountIncludedDeletes -= 1; if (reader.IsDeleted(termDocs.Doc)) { continue; } if (items[termDocs.Doc] == null) { items[termDocs.Doc] = new LinkedList <IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>(); } items[termDocs.Doc].AddLast(new IndexSearcherHolder.IndexSearcherHoldingState.CacheVal { Term = termEnum.Term }); } } while (termEnum.Next()); } } state.SetInCache(field, items); } }
public static void ReadEntriesForFields( IndexSearcherHolder.IndexSearcherHoldingState state, HashSet <string> fieldsToRead, HashSet <int> docIds, Func <string, string, double> convert, Action <string, string, double, int> onTermFound) { var reader = state.IndexSearcher.IndexReader; var readFromCache = new Dictionary <string, HashSet <int> >(); state.Lock.EnterReadLock(); try { EnsureFieldsAreInCache(state, fieldsToRead, reader); foreach (var field in fieldsToRead) { var read = new HashSet <int>(); readFromCache[field] = read; foreach (var docId in docIds) { foreach (var val in state.GetFromCache(field, docId)) { read.Add(docId); double converted; if (val.Val == null) { val.Val = converted = convert(val.Term.Field, val.Term.Text); } else { converted = val.Val.Value; } onTermFound(val.Term.Field, val.Term.Text, converted, docId); } } } } finally { if (state.Lock.IsReadLockHeld) { state.Lock.ExitReadLock(); } } }
public static void PreFillCache(IndexSearcherHolder.IndexSearcherHoldingState state, string[] fieldsToRead, IndexReader reader) { state.Lock.EnterWriteLock(); try { if (fieldsToRead.All(state.IsInCache)) { return; } FillCache(state, fieldsToRead, reader); } finally { state.Lock.ExitWriteLock(); } }
private static void EnsureFieldsAreInCache(IndexSearcherHolder.IndexSearcherHoldingState state, HashSet <string> fieldsToRead, IndexReader reader) { if (fieldsToRead.All(state.IsInCache)) { return; } state.Lock.ExitReadLock(); state.Lock.EnterWriteLock(); try { var fieldsNotInCache = fieldsToRead.Where(field => state.IsInCache(field) == false).ToList(); if (fieldsToRead.Count > 0) { FillCache(state, fieldsNotInCache, reader); } } finally { state.Lock.ExitWriteLock(); } state.Lock.EnterReadLock(); }
public void Execute() { ValidateFacets(); var facetsByName = new Dictionary<string, Dictionary<string, FacetValue>>(); bool isDistinct = IndexQuery.IsDistinct; if (isDistinct) { _fieldsCrc = IndexQuery.FieldsToFetch.Aggregate<string, uint>(0, (current, field) => Crc.Value(field, current)); } _currentState = Database.IndexStorage.GetCurrentStateHolder(Index); using (_currentState) { var currentIndexSearcher = _currentState.IndexSearcher; var baseQuery = Database.IndexStorage.GetDocumentQuery(Index, IndexQuery, Database.IndexQueryTriggers); var returnedReaders = GetQueryMatchingDocuments(currentIndexSearcher, baseQuery); foreach (var facet in Facets.Values) { if(facet.Mode != FacetMode.Default) continue; Dictionary<string, HashSet<IndexSearcherHolder.StringCollectionValue>> distinctItems = null; HashSet<IndexSearcherHolder.StringCollectionValue> alreadySeen = null; if(isDistinct) distinctItems = new Dictionary<string, HashSet<IndexSearcherHolder.StringCollectionValue>>(); foreach (var readerFacetInfo in returnedReaders) { var termsForField = IndexedTerms.GetTermsAndDocumenstFor(readerFacetInfo.Reader, readerFacetInfo.DocBase, facet.Name); Dictionary<string, FacetValue> facetValues; if (facetsByName.TryGetValue(facet.DisplayName, out facetValues) == false) { facetsByName[facet.DisplayName] = facetValues = new Dictionary<string, FacetValue>(); } foreach (var kvp in termsForField) { if (isDistinct) { if (distinctItems.TryGetValue(kvp.Key, out alreadySeen) == false) { alreadySeen = new HashSet<IndexSearcherHolder.StringCollectionValue>(); distinctItems[kvp.Key] = alreadySeen; } } var needToApplyAggregation = (facet.Aggregation == FacetAggregation.None || facet.Aggregation == FacetAggregation.Count) == false; var intersectedDocuments = GetIntersectedDocuments(kvp.Value, readerFacetInfo.Results, alreadySeen, needToApplyAggregation); var intersectCount = intersectedDocuments.Count; if (intersectCount == 0) continue; FacetValue facetValue; if (facetValues.TryGetValue(kvp.Key, out facetValue) == false) { facetValue = new FacetValue { Range = GetRangeName(facet.Name, kvp.Key) }; facetValues.Add(kvp.Key, facetValue); } facetValue.Hits += intersectCount; facetValue.Count = facetValue.Hits; if (needToApplyAggregation) { ApplyAggregation(facet, facetValue, intersectedDocuments.Documents, readerFacetInfo.Reader, readerFacetInfo.DocBase); } } } } foreach (var range in Ranges) { var facet = Facets[range.Key]; var needToApplyAggregation = (facet.Aggregation == FacetAggregation.None || facet.Aggregation == FacetAggregation.Count) == false; Dictionary<string, HashSet<IndexSearcherHolder.StringCollectionValue>> distinctItems = null; HashSet<IndexSearcherHolder.StringCollectionValue> alreadySeen = null; if (isDistinct) distinctItems = new Dictionary<string, HashSet<IndexSearcherHolder.StringCollectionValue>>(); foreach (var readerFacetInfo in returnedReaders) { var termsForField = IndexedTerms.GetTermsAndDocumenstFor(readerFacetInfo.Reader, readerFacetInfo.DocBase, facet.Name); if (isDistinct) { if (distinctItems.TryGetValue(range.Key, out alreadySeen) == false) { alreadySeen = new HashSet<IndexSearcherHolder.StringCollectionValue>(); distinctItems[range.Key] = alreadySeen; } } var facetResult = Results.Results[range.Key]; var ranges = range.Value; foreach (var kvp in termsForField) { for (int i = 0; i < ranges.Count; i++) { var parsedRange = ranges[i]; if (parsedRange.IsMatch(kvp.Key)) { var facetValue = facetResult.Values[i]; var intersectedDocuments = GetIntersectedDocuments(kvp.Value, readerFacetInfo.Results, alreadySeen, needToApplyAggregation); var intersectCount = intersectedDocuments.Count; if (intersectCount == 0) continue; facetValue.Hits += intersectCount; facetValue.Count = facetValue.Hits; if (needToApplyAggregation) { ApplyAggregation(facet, facetValue, intersectedDocuments.Documents, readerFacetInfo.Reader, readerFacetInfo.DocBase); } } } } } } UpdateFacetResults(facetsByName); CompleteFacetCalculationsStage(); } }
public static void ReadEntriesForFields( IndexSearcherHolder.IndexSearcherHoldingState state, HashSet <string> fieldsToRead, HashSet <int> docIds, Func <Term, double> convert, Action <Term, double, int> onTermFound) { var reader = state.IndexSearcher.IndexReader; var readFromCache = new Dictionary <string, HashSet <int> >(); state.Lock.EnterReadLock(); try { foreach (var field in fieldsToRead) { var read = new HashSet <int>(); readFromCache[field] = read; foreach (var docId in docIds) { foreach (var val in state.GetFromCache(field, docId)) { read.Add(docId); double converted; if (val.Val == null) { val.Val = converted = convert(val.Term); } else { converted = val.Val.Value; } onTermFound(val.Term, converted, docId); } } } } finally { state.Lock.ExitReadLock(); } foreach (var kvp in readFromCache) { if (kvp.Value.Count == docIds.Count) { fieldsToRead.Remove(kvp.Key); // already read all of it } } if (fieldsToRead.Count == 0) { return; } state.Lock.EnterWriteLock(); try { using (var termDocs = reader.TermDocs()) { foreach (var field in fieldsToRead) { var read = readFromCache[field]; var shouldReset = new HashSet <Tuple <string, int> >(); using (var termEnum = reader.Terms(new Term(field))) { do { if (termEnum.Term == null || field != termEnum.Term.Field) { break; } if (LowPrecisionNumber(termEnum.Term)) { continue; } var totalDocCountIncludedDeletes = termEnum.DocFreq(); termDocs.Seek(termEnum.Term); while (termDocs.Next() && totalDocCountIncludedDeletes > 0) { totalDocCountIncludedDeletes -= 1; if (read.Contains(termDocs.Doc)) { break; } if (reader.IsDeleted(termDocs.Doc)) { break; } if (docIds.Contains(termDocs.Doc) == false) { break; } if (shouldReset.Add(Tuple.Create(field, termDocs.Doc))) { state.ResetInCache(field, termDocs.Doc); } var d = convert(termEnum.Term); state.SetInCache(field, termDocs.Doc, termEnum.Term, d); onTermFound(termEnum.Term, d, termDocs.Doc); } } while (termEnum.Next()); } } } } finally { state.Lock.ExitWriteLock(); } }