public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> oneThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> twoThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields)) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; IList<DocFieldConsumerPerField> fields = entry.Value; //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); IList<DocFieldConsumerPerField> oneFields = new List<DocFieldConsumerPerField>(); IList<DocFieldConsumerPerField> twoFields = new List<DocFieldConsumerPerField>(); foreach (DocFieldConsumersPerField perField in fields) { oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state) { Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > oneThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(); Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > twoThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(); foreach (KeyValuePair <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > entry in new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(threadsAndFields)) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key; IList <DocFieldConsumerPerField> fields = entry.Value; //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); IList <DocFieldConsumerPerField> oneFields = new List <DocFieldConsumerPerField>(); IList <DocFieldConsumerPerField> twoFields = new List <DocFieldConsumerPerField>(); foreach (DocFieldConsumersPerField perField in fields) { oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state) { Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > childThreadsAndFields = new Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >(); Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > endChildThreadsAndFields = new Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> >(); foreach (KeyValuePair <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > entry in new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(threadsAndFields)) { DocInverterPerThread perThread = (DocInverterPerThread)entry.Key; List <InvertedDocConsumerPerField> childFields = new List <InvertedDocConsumerPerField>(); List <InvertedDocEndConsumerPerField> endChildFields = new List <InvertedDocEndConsumerPerField>(); foreach (DocFieldConsumerPerField field in entry.Value) { DocInverterPerField perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> childThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>(); Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>>(); foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields)) { DocInverterPerThread perThread = (DocInverterPerThread)entry.Key; List<InvertedDocConsumerPerField> childFields = new List<InvertedDocConsumerPerField>(); List<InvertedDocEndConsumerPerField> endChildFields = new List<InvertedDocEndConsumerPerField>(); foreach (DocFieldConsumerPerField field in entry.Value) { DocInverterPerField perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public virtual System.Object Get(IndexReader reader, Entry key) { IDictionary <Entry, Object> innerCache; System.Object value_Renamed; System.Object readerKey = reader.GetFieldCacheKey(); lock (readerCache) { innerCache = readerCache[readerKey]; if (innerCache == null) { innerCache = new Support.Dictionary <Entry, Object>(); readerCache[readerKey] = innerCache; value_Renamed = null; } else { value_Renamed = innerCache[key]; } if (value_Renamed == null) { value_Renamed = new CreationPlaceholder(); innerCache[key] = value_Renamed; } } if (value_Renamed is CreationPlaceholder) { lock (value_Renamed) { CreationPlaceholder progress = (CreationPlaceholder)value_Renamed; if (progress.value_Renamed == null) { progress.value_Renamed = CreateValue(reader, key); lock (readerCache) { innerCache[key] = progress.value_Renamed; } // Only check if key.custom (the parser) is // non-null; else, we check twice for a single // call to FieldCache.getXXX if (key.custom != null && wrapper != null) { System.IO.StreamWriter infoStream = wrapper.GetInfoStream(); if (infoStream != null) { PrintNewInsanity(infoStream, progress.value_Renamed); } } } return(progress.value_Renamed); } } return(value_Renamed); }
/// <summary> Callback mechanism used by the TermVectorReader</summary> /// <param name="field"> The field being read /// </param> /// <param name="numTerms">The number of terms in the vector /// </param> /// <param name="storeOffsets">Whether offsets are available /// </param> /// <param name="storePositions">Whether positions are available /// </param> public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) { throw new System.SystemException("You must store positions in order to use this Mapper"); } if (storeOffsets == true) { //ignoring offsets } fieldToTerms = new Support.Dictionary <string, Support.Dictionary <int, TVPositionInfo> >(numTerms); this.storeOffsets = storeOffsets; currentField = field; currentPositions = new Support.Dictionary <int, TVPositionInfo>(); fieldToTerms[currentField] = currentPositions; }
private void Init() { lock (this) { Support.Dictionary <Type, Cache> caches2 = new Support.Dictionary <Type, Cache>(); caches2[typeof(sbyte)] = new ByteCache(this); caches2[typeof(short)] = new ShortCache(this); caches2[typeof(int)] = new IntCache(this); caches2[typeof(float)] = new FloatCache(this); caches2[typeof(long)] = new LongCache(this); caches2[typeof(double)] = new DoubleCache(this); caches2[typeof(string)] = new StringCache(this); caches2[typeof(StringIndex)] = new StringIndexCache(this); caches2[typeof(System.IComparable)] = new CustomCache(this); caches2[typeof(System.Object)] = new AutoCache(this); caches = caches2; } }
private void Init() { lock (this) { Support.Dictionary<Type, Cache> caches2 = new Support.Dictionary<Type, Cache>(); caches2[typeof(sbyte)] = new ByteCache(this); caches2[typeof(short)] = new ShortCache(this); caches2[typeof(int)] = new IntCache(this); caches2[typeof(float)] = new FloatCache(this); caches2[typeof(long)] = new LongCache(this); caches2[typeof(double)] = new DoubleCache(this); caches2[typeof(string)] = new StringCache(this); caches2[typeof(StringIndex)] = new StringIndexCache(this); caches2[typeof(System.IComparable)] = new CustomCache(this); caches2[typeof(System.Object)] = new AutoCache(this); caches = caches2; } }
public override void Flush(IDictionary<DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state) { Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> childThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); foreach (DocFieldProcessorPerThread perThread in threads.Keys) { childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 Support.Set <Lucene.Net.Index.Term> terms = new Support.Set <Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; foreach (Term t in terms) { allTermsArray[index++] = t; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } IDictionary <Term, int> dfMap = new Support.Dictionary <Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return(rewrittenQuery.Weight(cacheSim)); }
public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) { // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } tvx.Flush(); tvd.Flush(); tvf.Flush(); } foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { foreach (TermsHashConsumerPerField field in entry.Value) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } } }
private void ProcessTerms(System.String[] queryTerms) { if (queryTerms != null) { System.Array.Sort <string>(queryTerms); Support.Dictionary <string, int?> tmpSet = new Support.Dictionary <string, int?>(queryTerms.Length); //filter out duplicates List <String> tmpList = new List <String>(queryTerms.Length); List <int> tmpFreqs = new List <int>(queryTerms.Length); int j = 0; for (int i = 0; i < queryTerms.Length; i++) { System.String term = queryTerms[i]; System.Object temp_position = tmpSet[term]; if (temp_position == null) { tmpSet[term] = j++; tmpList.Add(term); tmpFreqs.Add(1); } else { int?position = tmpSet[term]; int integer = tmpFreqs[position.Value]; tmpFreqs[position.Value] = (integer + 1); } } terms = tmpList.ToArray(); //termFreqs = (int[])tmpFreqs.toArray(termFreqs); termFreqs = new int[tmpFreqs.Count]; int i2 = 0; foreach (int integer in tmpFreqs) { termFreqs[i2++] = integer; } } }
/// <summary> Callback mechanism used by the TermVectorReader</summary> /// <param name="field"> The field being read /// </param> /// <param name="numTerms">The number of terms in the vector /// </param> /// <param name="storeOffsets">Whether offsets are available /// </param> /// <param name="storePositions">Whether positions are available /// </param> public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) { throw new System.SystemException("You must store positions in order to use this Mapper"); } if (storeOffsets == true) { //ignoring offsets } fieldToTerms = new Support.Dictionary<string, Support.Dictionary<int, TVPositionInfo>>(numTerms); this.storeOffsets = storeOffsets; currentField = field; currentPositions = new Support.Dictionary<int, TVPositionInfo>(); fieldToTerms[currentField] = currentPositions; }
/// <summary>Closes the store to future operations, releasing associated memory. </summary> public override void Close() { isOpen = false; fileMap = null; }
internal override void Flush(Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state) { lock (this) { Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > childThreadsAndFields = new Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> >(); Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >(); } else { nextThreadsAndFields = null; } foreach (KeyValuePair <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; IList <InvertedDocConsumerPerField> fields = entry.Value; IEnumerator <InvertedDocConsumerPerField> fieldsIt = fields.GetEnumerator(); List <TermsHashConsumerPerField> childFields = new List <TermsHashConsumerPerField>(); List <InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new List <InvertedDocConsumerPerField>(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) { nextChildFields.Add(perField.nextPerField); } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state); } } }
public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state) { Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(); foreach (DocFieldProcessorPerThread perThread in threads.Keys) { childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
/// <summary>Called when DocumentsWriter decides to create a new /// segment /// </summary> public abstract void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state);
public SimpleMapCache(Support.Dictionary <K, V> map) { this.map = map; }
// Remaps all buffered deletes based on a completed // merge internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { System.Collections.Generic.IDictionary<Term, Num> newDeleteTerms; // Remap delete-by-term if (terms.Count > 0) { if (doTermSort) { newDeleteTerms = new Support.Dictionary<Term, Num>(true); } else { newDeleteTerms = new Support.Dictionary<Term, Num>(); } foreach (KeyValuePair<Term, Num> entry in terms) { Num num = entry.Value; newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); } } else newDeleteTerms = null; // Remap delete-by-docID List<int> newDeleteDocIDs; if (docIDs.Count > 0) { newDeleteDocIDs = new List<int>(docIDs.Count); foreach(int num in docIDs) { newDeleteDocIDs.Add(mapper.Remap(num)); } } else newDeleteDocIDs = null; // Remap delete-by-query Support.Dictionary<Query, int> newDeleteQueries; if (queries.Count > 0) { newDeleteQueries = new Support.Dictionary<Query, int>(queries.Count); foreach(KeyValuePair<Query,int> entry in queries) { int num = entry.Value; newDeleteQueries[entry.Key] = mapper.Remap(num); } } else newDeleteQueries = null; if (newDeleteTerms != null) terms = newDeleteTerms; if (newDeleteDocIDs != null) docIDs = newDeleteDocIDs; if (newDeleteQueries != null) queries = newDeleteQueries; } }
/// <summary> Sets the date resolution used by RangeQueries for a specific field. /// /// </summary> /// <param name="fieldName">field for which the date resolution is to be set /// </param> /// <param name="dateResolution">date resolution to set /// </param> public virtual void SetDateResolution(System.String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new System.ArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new Support.Dictionary<string, DateTools.Resolution>(); } fieldToDateResolution[fieldName] = dateResolution; }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) { Support.Dictionary<FieldInfo, IList<NormsWriterPerField>> byField = new Support.Dictionary<FieldInfo, IList<NormsWriterPerField>>(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo foreach(KeyValuePair<InvertedDocEndConsumerPerThread,IList<InvertedDocEndConsumerPerField>> entry in threadsAndFields) { IList<InvertedDocEndConsumerPerField> fields = entry.Value; IEnumerator<InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); List<NormsWriterPerField> fieldsToRemove = new List<NormsWriterPerField>(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList<NormsWriterPerField> l = byField[perField.fieldInfo]; if (l == null) { l = new List<NormsWriterPerField>(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } var fieldsHT = fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles.Add(normsFileName); IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList) byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) fields[j] = (NormsWriterPerField) toMerge[j]; int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) normsOut.WriteByte(defaultNorm); normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 Support.Set<Lucene.Net.Index.Term> terms = new Support.Set<Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; foreach (Term t in terms) { allTermsArray[index++] = t; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } IDictionary<Term, int> dfMap = new Support.Dictionary<Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return rewrittenQuery.Weight(cacheSim); }
/// <summary>Flush all pending docs to a new segment </summary> internal int Flush(bool closeDocStore) { lock (this) { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Diagnostics.Debug.Assert(numDocsInRAM > 0); System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0); System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); InitFlushState(false); docStoreOffset = numDocsInStore; if (infoStream != null) Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); bool success = false; try { if (closeDocStore) { System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null); System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName)); CloseDocStore(); flushState.numDocsInStore = 0; } IDictionary<DocConsumerPerThread,DocConsumerPerThread> threads = new Support.Dictionary<DocConsumerPerThread,DocConsumerPerThread>(); for (int i = 0; i < threadStates.Length; i++) threads[threadStates[i].consumer] = threadStates[i].consumer; consumer.Flush(threads, flushState); if (infoStream != null) { SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory); long newSegmentSize = si.SizeInBytes(); System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) }); Message(message); } flushedDocCount += flushState.numDocs; DoAfterFlush(); success = true; } finally { if (!success) { Abort(); } } System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); return flushState.numDocs; } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>(); foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { IList <TermsHashConsumerPerField> fields = entry.Value; foreach (TermsHashConsumerPerField i in fields) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
internal override void Flush(Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) { lock (this) { Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>> childThreadsAndFields = new Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>>(); Support.Dictionary<InvertedDocConsumerPerThread,IList<InvertedDocConsumerPerField>> nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>(); } else nextThreadsAndFields = null; foreach (KeyValuePair<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; IList<InvertedDocConsumerPerField> fields = entry.Value; IEnumerator<InvertedDocConsumerPerField> fieldsIt = fields.GetEnumerator(); List<TermsHashConsumerPerField> childFields = new List<TermsHashConsumerPerField>(); List<InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new List<InvertedDocConsumerPerField>(); } else nextChildFields = null; while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) nextChildFields.Add(perField.nextPerField); } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) nextTermsHash.Flush(nextThreadsAndFields, state); } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > threadsAndFields, SegmentWriteState state) { Support.Dictionary <FieldInfo, IList <NormsWriterPerField> > byField = new Support.Dictionary <FieldInfo, IList <NormsWriterPerField> >(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo foreach (KeyValuePair <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > entry in threadsAndFields) { IList <InvertedDocEndConsumerPerField> fields = entry.Value; IEnumerator <InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); List <NormsWriterPerField> fieldsToRemove = new List <NormsWriterPerField>(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList <NormsWriterPerField> l = byField[perField.fieldInfo]; if (l == null) { l = new List <NormsWriterPerField>(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } var fieldsHT = fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles.Add(normsFileName); IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
/// <summary>Flush a new segment </summary> internal abstract void Flush(Support.Dictionary <InvertedDocConsumerPerThread, System.Collections.Generic.IList <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state);
// Remaps all buffered deletes based on a completed // merge internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { System.Collections.Generic.IDictionary <Term, Num> newDeleteTerms; // Remap delete-by-term if (terms.Count > 0) { if (doTermSort) { newDeleteTerms = new Support.Dictionary <Term, Num>(true); } else { newDeleteTerms = new Support.Dictionary <Term, Num>(); } foreach (KeyValuePair <Term, Num> entry in terms) { Num num = entry.Value; newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); } } else { newDeleteTerms = null; } // Remap delete-by-docID List <int> newDeleteDocIDs; if (docIDs.Count > 0) { newDeleteDocIDs = new List <int>(docIDs.Count); foreach (int num in docIDs) { newDeleteDocIDs.Add(mapper.Remap(num)); } } else { newDeleteDocIDs = null; } // Remap delete-by-query Support.Dictionary <Query, int> newDeleteQueries; if (queries.Count > 0) { newDeleteQueries = new Support.Dictionary <Query, int>(queries.Count); foreach (KeyValuePair <Query, int> entry in queries) { int num = entry.Value; newDeleteQueries[entry.Key] = mapper.Remap(num); } } else { newDeleteQueries = null; } if (newDeleteTerms != null) { terms = newDeleteTerms; } if (newDeleteDocIDs != null) { docIDs = newDeleteDocIDs; } if (newDeleteQueries != null) { queries = newDeleteQueries; } } }
/// <summary> Init PhrasePositions in place. /// There is a one time initialization for this scorer: /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc. /// <br/>- Also mark each such pp by pp.repeats = true. /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. /// <br/>- Example 1 - query with no repetitions: "ho my"~2 /// <br/>- Example 2 - query with repetitions: "ho my my"~2 /// <br/>- Example 3 - query with repetitions: "my ho my"~2 /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. /// </summary> /// <returns> end (max position), or -1 if any term ran out (i.e. done) /// </returns> /// <throws> IOException </throws> private int InitPhrasePositions() { int end = 0; // no repeats at all (most common case is also the simplest one) if (checkedRepeats && repeats == null) { // build queue from list pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.FirstPosition(); if (pp.position > end) { end = pp.position; } pq.Put(pp); // build pq from list } return(end); } // position the pp's for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.FirstPosition(); } // one time initializatin for this scorer if (!checkedRepeats) { checkedRepeats = true; // check for repeats Support.Dictionary <PhrasePositions, Object> m = null; for (PhrasePositions pp = first; pp != null; pp = pp.next) { int tpPos = pp.position + pp.offset; for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) { int tpPos2 = pp2.position + pp2.offset; if (tpPos2 == tpPos) { if (m == null) { m = new Support.Dictionary <PhrasePositions, object>(); } pp.repeats = true; pp2.repeats = true; m[pp] = null; m[pp2] = null; } } } if (m != null) { repeats = new PhrasePositions[m.Keys.Count]; m.Keys.CopyTo(repeats, 0); } } // with repeats must advance some repeating pp's so they all start with differing tp's if (repeats != null) { for (int i = 0; i < repeats.Length; i++) { PhrasePositions pp = repeats[i]; PhrasePositions pp2; while ((pp2 = TermPositionsDiffer(pp)) != null) { if (!pp2.NextPosition()) { // out of pps that do not differ, advance the pp with higher offset return(-1); // ran out of a term -- done } } } } // build queue from list pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { if (pp.position > end) { end = pp.position; } pq.Put(pp); // build pq from list } if (repeats != null) { tmpPos = new PhrasePositions[pq.Size()]; } return(end); }
public virtual System.Object Get(IndexReader reader, Entry key) { IDictionary<Entry, Object> innerCache; System.Object value_Renamed; System.Object readerKey = reader.GetFieldCacheKey(); lock (readerCache) { innerCache = readerCache[readerKey]; if (innerCache == null) { innerCache = new Support.Dictionary<Entry, Object>(); readerCache[readerKey] = innerCache; value_Renamed = null; } else { value_Renamed = innerCache[key]; } if (value_Renamed == null) { value_Renamed = new CreationPlaceholder(); innerCache[key] = value_Renamed; } } if (value_Renamed is CreationPlaceholder) { lock (value_Renamed) { CreationPlaceholder progress = (CreationPlaceholder) value_Renamed; if (progress.value_Renamed == null) { progress.value_Renamed = CreateValue(reader, key); lock (readerCache) { innerCache[key] = progress.value_Renamed; } // Only check if key.custom (the parser) is // non-null; else, we check twice for a single // call to FieldCache.getXXX if (key.custom != null && wrapper != null) { System.IO.StreamWriter infoStream = wrapper.GetInfoStream(); if (infoStream != null) { PrintNewInsanity(infoStream, progress.value_Renamed); } } } return progress.value_Renamed; } } return value_Renamed; }