Example #1
        public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)

            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> oneThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();
            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> twoThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();

            foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields))
            	DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
				IList<DocFieldConsumerPerField> fields = entry.Value;

                //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                IList<DocFieldConsumerPerField> oneFields = new List<DocFieldConsumerPerField>();
                IList<DocFieldConsumerPerField> twoFields = new List<DocFieldConsumerPerField>();
                foreach (DocFieldConsumersPerField perField in fields)
				oneThreadsAndFields[perThread.one] = oneFields;
				twoThreadsAndFields[perThread.two] = twoFields;
			one.Flush(oneThreadsAndFields, state);
			two.Flush(twoThreadsAndFields, state);
Example #2
        public override void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state)
            Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > oneThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >();
            Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > twoThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >();

            foreach (KeyValuePair <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > entry in new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(threadsAndFields))
                DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key;

                IList <DocFieldConsumerPerField> fields = entry.Value;

                //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                IList <DocFieldConsumerPerField> oneFields = new List <DocFieldConsumerPerField>();
                IList <DocFieldConsumerPerField> twoFields = new List <DocFieldConsumerPerField>();
                foreach (DocFieldConsumersPerField perField in fields)

                oneThreadsAndFields[perThread.one] = oneFields;
                twoThreadsAndFields[perThread.two] = twoFields;

            one.Flush(oneThreadsAndFields, state);
            two.Flush(twoThreadsAndFields, state);
Example #3
        public override void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state)
            Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >       childThreadsAndFields    = new Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >();
            Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > endChildThreadsAndFields = new Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> >();

            foreach (KeyValuePair <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > entry in new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(threadsAndFields))
                DocInverterPerThread perThread = (DocInverterPerThread)entry.Key;

                List <InvertedDocConsumerPerField>    childFields    = new List <InvertedDocConsumerPerField>();
                List <InvertedDocEndConsumerPerField> endChildFields = new List <InvertedDocEndConsumerPerField>();
                foreach (DocFieldConsumerPerField field in entry.Value)
                    DocInverterPerField perField = (DocInverterPerField)field;

                childThreadsAndFields[perThread.consumer]       = childFields;
                endChildThreadsAndFields[perThread.endConsumer] = endChildFields;

            consumer.Flush(childThreadsAndFields, state);
            endConsumer.Flush(endChildThreadsAndFields, state);
Example #4
        public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)

            Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> childThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>();
            Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>>();

            foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields))
                DocInverterPerThread perThread = (DocInverterPerThread)entry.Key;
                List<InvertedDocConsumerPerField> childFields = new List<InvertedDocConsumerPerField>();
                List<InvertedDocEndConsumerPerField> endChildFields = new List<InvertedDocEndConsumerPerField>();
                foreach (DocFieldConsumerPerField field in entry.Value)
                    DocInverterPerField perField = (DocInverterPerField)field;

                childThreadsAndFields[perThread.consumer] = childFields;
                endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
			consumer.Flush(childThreadsAndFields, state);
			endConsumer.Flush(endChildThreadsAndFields, state);
Example #5
            public virtual System.Object Get(IndexReader reader, Entry key)
                IDictionary <Entry, Object> innerCache;

                System.Object value_Renamed;
                System.Object readerKey = reader.GetFieldCacheKey();
                lock (readerCache)
                    innerCache = readerCache[readerKey];
                    if (innerCache == null)
                        innerCache             = new Support.Dictionary <Entry, Object>();
                        readerCache[readerKey] = innerCache;
                        value_Renamed          = null;
                        value_Renamed = innerCache[key];
                    if (value_Renamed == null)
                        value_Renamed   = new CreationPlaceholder();
                        innerCache[key] = value_Renamed;
                if (value_Renamed is CreationPlaceholder)
                    lock (value_Renamed)
                        CreationPlaceholder progress = (CreationPlaceholder)value_Renamed;
                        if (progress.value_Renamed == null)
                            progress.value_Renamed = CreateValue(reader, key);
                            lock (readerCache)
                                innerCache[key] = progress.value_Renamed;

                            // Only check if key.custom (the parser) is
                            // non-null; else, we check twice for a single
                            // call to FieldCache.getXXX
                            if (key.custom != null && wrapper != null)
                                System.IO.StreamWriter infoStream = wrapper.GetInfoStream();
                                if (infoStream != null)
                                    PrintNewInsanity(infoStream, progress.value_Renamed);
 /// <summary> Callback mechanism used by the TermVectorReader</summary>
 /// <param name="field"> The field being read
 /// </param>
 /// <param name="numTerms">The number of terms in the vector
 /// </param>
 /// <param name="storeOffsets">Whether offsets are available
 /// </param>
 /// <param name="storePositions">Whether positions are available
 /// </param>
 public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
     if (storePositions == false)
         throw new System.SystemException("You must store positions in order to use this Mapper");
     if (storeOffsets == true)
         //ignoring offsets
     fieldToTerms               = new Support.Dictionary <string, Support.Dictionary <int, TVPositionInfo> >(numTerms);
     this.storeOffsets          = storeOffsets;
     currentField               = field;
     currentPositions           = new Support.Dictionary <int, TVPositionInfo>();
     fieldToTerms[currentField] = currentPositions;
Example #7
 private void  Init()
     lock (this)
         Support.Dictionary <Type, Cache> caches2 = new Support.Dictionary <Type, Cache>();
         caches2[typeof(sbyte)]              = new ByteCache(this);
         caches2[typeof(short)]              = new ShortCache(this);
         caches2[typeof(int)]                = new IntCache(this);
         caches2[typeof(float)]              = new FloatCache(this);
         caches2[typeof(long)]               = new LongCache(this);
         caches2[typeof(double)]             = new DoubleCache(this);
         caches2[typeof(string)]             = new StringCache(this);
         caches2[typeof(StringIndex)]        = new StringIndexCache(this);
         caches2[typeof(System.IComparable)] = new CustomCache(this);
         caches2[typeof(System.Object)]      = new AutoCache(this);
         caches = caches2;
		private void  Init()
			lock (this)
                Support.Dictionary<Type, Cache> caches2 = new Support.Dictionary<Type, Cache>();
                caches2[typeof(sbyte)] = new ByteCache(this);
                caches2[typeof(short)] = new ShortCache(this);
                caches2[typeof(int)] = new IntCache(this);
                caches2[typeof(float)] = new FloatCache(this);
                caches2[typeof(long)] = new LongCache(this);
                caches2[typeof(double)] = new DoubleCache(this);
                caches2[typeof(string)] = new StringCache(this);
                caches2[typeof(StringIndex)] = new StringIndexCache(this);
                caches2[typeof(System.IComparable)] = new CustomCache(this);
                caches2[typeof(System.Object)] = new AutoCache(this);
                caches = caches2;
        public override void Flush(IDictionary<DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state)

            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> childThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();
            foreach (DocFieldProcessorPerThread perThread in threads.Keys)
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
			consumer.Flush(childThreadsAndFields, state);
			// Important to save after asking consumer to flush so
			// consumer can alter the FieldInfo* if necessary.  EG,
			// FreqProxTermsWriter does this with
			// FieldInfo.storePayload.
			System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
			fieldInfos.Write(state.directory, fileName);
Example #10
        /// <summary> Create weight in multiple index scenario.
        /// Distributed query processing is done in the following steps:
        /// 1. rewrite query
        /// 2. extract necessary terms
        /// 3. collect dfs for these terms from the Searchables
        /// 4. create query weight using aggregate dfs.
        /// 5. distribute that weight to Searchables
        /// 6. merge results
        /// Steps 1-4 are done here, 5+6 in the search() methods
        /// </summary>
        /// <returns> rewritten queries
        /// </returns>
        public /*protected internal*/ override Weight CreateWeight(Query original)
            // step 1
            Query rewrittenQuery = Rewrite(original);

            // step 2
            Support.Set <Lucene.Net.Index.Term> terms = new Support.Set <Term>();

            // step3
            Term[] allTermsArray = new Term[terms.Count];
            int    index         = 0;

            foreach (Term t in terms)
                allTermsArray[index++] = t;

            int[] aggregatedDfs = new int[terms.Count];
            for (int i = 0; i < searchables.Length; i++)
                int[] dfs = searchables[i].DocFreqs(allTermsArray);
                for (int j = 0; j < aggregatedDfs.Length; j++)
                    aggregatedDfs[j] += dfs[j];

            IDictionary <Term, int> dfMap = new Support.Dictionary <Term, int>();

            for (int i = 0; i < allTermsArray.Length; i++)
                dfMap[allTermsArray[i]] = aggregatedDfs[i];

            // step4
            int            numDocs  = MaxDoc();
            CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());

        public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state)
            lock (this)
                // NOTE: it's possible that all documents seen in this segment
                // hit non-aborting exceptions, in which case we will
                // not have yet init'd the TermVectorsWriter.  This is
                // actually OK (unlike in the stored fields case)
                // because, although IieldInfos.hasVectors() will return
                // true, the TermVectorsReader gracefully handles
                // non-existence of the term vectors files.
                if (tvx != null)
                    if (state.numDocsInStore > 0)
                        // In case there are some final documents that we
                        // didn't see (because they hit a non-aborting exception):
                        Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());


                foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
                    foreach (TermsHashConsumerPerField field in entry.Value)
                        TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field;

                    TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key;
Example #12
 private void  ProcessTerms(System.String[] queryTerms)
     if (queryTerms != null)
         System.Array.Sort <string>(queryTerms);
         Support.Dictionary <string, int?> tmpSet = new Support.Dictionary <string, int?>(queryTerms.Length);
         //filter out duplicates
         List <String> tmpList  = new List <String>(queryTerms.Length);
         List <int>    tmpFreqs = new List <int>(queryTerms.Length);
         int           j        = 0;
         for (int i = 0; i < queryTerms.Length; i++)
             System.String term          = queryTerms[i];
             System.Object temp_position = tmpSet[term];
             if (temp_position == null)
                 tmpSet[term] = j++;
                 int?position = tmpSet[term];
                 int integer  = tmpFreqs[position.Value];
                 tmpFreqs[position.Value] = (integer + 1);
         terms = tmpList.ToArray();
         //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
         termFreqs = new int[tmpFreqs.Count];
         int i2 = 0;
         foreach (int integer in tmpFreqs)
             termFreqs[i2++] = integer;
		/// <summary> Callback mechanism used by the TermVectorReader</summary>
		/// <param name="field"> The field being read
		/// </param>
		/// <param name="numTerms">The number of terms in the vector
		/// </param>
		/// <param name="storeOffsets">Whether offsets are available
		/// </param>
		/// <param name="storePositions">Whether positions are available
		/// </param>
		public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
			if (storePositions == false)
				throw new System.SystemException("You must store positions in order to use this Mapper");
			if (storeOffsets == true)
				//ignoring offsets
            fieldToTerms = new Support.Dictionary<string, Support.Dictionary<int, TVPositionInfo>>(numTerms);
			this.storeOffsets = storeOffsets;
			currentField = field;
            currentPositions = new Support.Dictionary<int, TVPositionInfo>();
			fieldToTerms[currentField] = currentPositions;
Example #14
 /// <summary>Closes the store to future operations, releasing associated memory. </summary>
 public override void  Close()
     isOpen  = false;
     fileMap = null;
Example #15
        internal override void Flush(Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state)
            lock (this)
                Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> >     childThreadsAndFields = new Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> >();
                Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > nextThreadsAndFields;

                if (nextTermsHash != null)
                    nextThreadsAndFields = new Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >();
                    nextThreadsAndFields = null;

                foreach (KeyValuePair <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > entry in threadsAndFields)
                    TermsHashPerThread perThread = (TermsHashPerThread)entry.Key;

                    IList <InvertedDocConsumerPerField> fields = entry.Value;

                    IEnumerator <InvertedDocConsumerPerField> fieldsIt    = fields.GetEnumerator();
                    List <TermsHashConsumerPerField>          childFields = new List <TermsHashConsumerPerField>();
                    List <InvertedDocConsumerPerField>        nextChildFields;

                    if (nextTermsHash != null)
                        nextChildFields = new List <InvertedDocConsumerPerField>();
                        nextChildFields = null;

                    while (fieldsIt.MoveNext())
                        TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current;
                        if (nextTermsHash != null)

                    childThreadsAndFields[perThread.consumer] = childFields;
                    if (nextTermsHash != null)
                        nextThreadsAndFields[perThread.nextPerThread] = nextChildFields;

                consumer.Flush(childThreadsAndFields, state);

                ShrinkFreePostings(threadsAndFields, state);

                if (nextTermsHash != null)
                    nextTermsHash.Flush(nextThreadsAndFields, state);
Example #16
        public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state)
            Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >();
            foreach (DocFieldProcessorPerThread perThread in threads.Keys)
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
            consumer.Flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            fieldInfos.Write(state.directory, fileName);
Example #17
 /// <summary>Called when DocumentsWriter decides to create a new
 /// segment
 /// </summary>
 public abstract void Flush(Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state);
		/// <summary>Closes the store to future operations, releasing associated memory. </summary>
		public override void  Close()
			isOpen = false;
			fileMap = null;
Example #19
 public SimpleMapCache(Support.Dictionary <K, V> map)
     this.map = map;
Example #20
		// Remaps all buffered deletes based on a completed
		// merge
		internal virtual void  Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
			lock (this)

                System.Collections.Generic.IDictionary<Term, Num> newDeleteTerms;
				// Remap delete-by-term
				if (terms.Count > 0)
                    if (doTermSort)
                        newDeleteTerms = new Support.Dictionary<Term, Num>(true);
                        newDeleteTerms = new Support.Dictionary<Term, Num>();
                    foreach (KeyValuePair<Term, Num> entry in terms)
                        Num num = entry.Value;
                        newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum()));
					newDeleteTerms = null;
				// Remap delete-by-docID
				List<int> newDeleteDocIDs;
				if (docIDs.Count > 0)
					newDeleteDocIDs = new List<int>(docIDs.Count);
                    foreach(int num in docIDs)
					newDeleteDocIDs = null;
				// Remap delete-by-query
                Support.Dictionary<Query, int> newDeleteQueries;
				if (queries.Count > 0)
                    newDeleteQueries = new Support.Dictionary<Query, int>(queries.Count);
                    foreach(KeyValuePair<Query,int> entry in queries)
                        int num = entry.Value;
                        newDeleteQueries[entry.Key] = mapper.Remap(num);
					newDeleteQueries = null;
				if (newDeleteTerms != null)
					terms = newDeleteTerms;
				if (newDeleteDocIDs != null)
					docIDs = newDeleteDocIDs;
				if (newDeleteQueries != null)
					queries = newDeleteQueries;
Example #21
		/// <summary> Sets the date resolution used by RangeQueries for a specific field.
		/// </summary>
		/// <param name="fieldName">field for which the date resolution is to be set 
		/// </param>
		/// <param name="dateResolution">date resolution to set
		/// </param>
		public virtual void  SetDateResolution(System.String fieldName, DateTools.Resolution dateResolution)
			if (fieldName == null)
				throw new System.ArgumentException("Field cannot be null.");
			if (fieldToDateResolution == null)
				// lazily initialize HashMap
                fieldToDateResolution = new Support.Dictionary<string, DateTools.Resolution>();
			fieldToDateResolution[fieldName] = dateResolution;
Example #22
		/// <summary>Produce _X.nrm if any document had a field with norms
		/// not disabled 
		/// </summary>
        public override void Flush(Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state)

            Support.Dictionary<FieldInfo, IList<NormsWriterPerField>> byField = new Support.Dictionary<FieldInfo, IList<NormsWriterPerField>>();
			// Typically, each thread will have encountered the same
			// field.  So first we collate by field, ie, all
			// per-thread field instances that correspond to the
			// same FieldInfo
			foreach(KeyValuePair<InvertedDocEndConsumerPerThread,IList<InvertedDocEndConsumerPerField>> entry in threadsAndFields) {

                IList<InvertedDocEndConsumerPerField> fields = entry.Value;
                IEnumerator<InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator();
                List<NormsWriterPerField> fieldsToRemove = new List<NormsWriterPerField>();
				while (fieldsIt.MoveNext())
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;
					if (perField.upto > 0)
						// It has some norms
                        IList<NormsWriterPerField> l = byField[perField.fieldInfo];
						if (l == null)
                            l = new List<NormsWriterPerField>();
							byField[perField.fieldInfo] = l;
					// Remove this field since we haven't seen it
					// since the previous flush

                var fieldsHT = fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
			System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
			IndexOutput normsOut = state.directory.CreateOutput(normsFileName);
				normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);
				int numField = fieldInfos.Size();
				int normCount = 0;
				for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
					FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);
					System.Collections.IList toMerge = (System.Collections.IList) byField[fieldInfo];
					int upto = 0;
					if (toMerge != null)
						int numFields = toMerge.Count;
						NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
						int[] uptos = new int[numFields];
						for (int j = 0; j < numFields; j++)
							fields[j] = (NormsWriterPerField) toMerge[j];
						int numLeft = numFields;
						while (numLeft > 0)
							System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length));
							int minLoc = 0;
							int minDocID = fields[0].docIDs[uptos[0]];
							for (int j = 1; j < numLeft; j++)
								int docID = fields[j].docIDs[uptos[j]];
								if (docID < minDocID)
									minDocID = docID;
									minLoc = j;
							System.Diagnostics.Debug.Assert(minDocID < state.numDocs);
							// Fill hole
							for (; upto < minDocID; upto++)
							if (uptos[minLoc] == fields[minLoc].upto)
								if (minLoc != numLeft - 1)
									fields[minLoc] = fields[numLeft - 1];
									uptos[minLoc] = uptos[numLeft - 1];
						// Fill final hole with defaultNorm
						for (; upto < state.numDocs; upto++)
					else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
						// Fill entire field with default norm:
						for (; upto < state.numDocs; upto++)
					System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
		/// <summary> Create weight in multiple index scenario.
		/// Distributed query processing is done in the following steps:
		/// 1. rewrite query
		/// 2. extract necessary terms
		/// 3. collect dfs for these terms from the Searchables
		/// 4. create query weight using aggregate dfs.
		/// 5. distribute that weight to Searchables
		/// 6. merge results
		/// Steps 1-4 are done here, 5+6 in the search() methods
		/// </summary>
		/// <returns> rewritten queries
		/// </returns>
		public /*protected internal*/ override Weight CreateWeight(Query original)
			// step 1
			Query rewrittenQuery = Rewrite(original);
			// step 2
            Support.Set<Lucene.Net.Index.Term> terms = new Support.Set<Term>();
			// step3
			Term[] allTermsArray = new Term[terms.Count];
            int index = 0;
            foreach (Term t in terms)
                allTermsArray[index++] = t;
            int[] aggregatedDfs = new int[terms.Count];
			for (int i = 0; i < searchables.Length; i++)
				int[] dfs = searchables[i].DocFreqs(allTermsArray);
				for (int j = 0; j < aggregatedDfs.Length; j++)
					aggregatedDfs[j] += dfs[j];

            IDictionary<Term, int> dfMap = new Support.Dictionary<Term, int>();
			for (int i = 0; i < allTermsArray.Length; i++)
				dfMap[allTermsArray[i]] = aggregatedDfs[i];
			// step4
			int numDocs = MaxDoc();
			CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());
			return rewrittenQuery.Weight(cacheSim);
Example #24
		/// <summary>Flush all pending docs to a new segment </summary>
		internal int Flush(bool closeDocStore)
			lock (this)
				System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
				System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
				System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				docStoreOffset = numDocsInStore;
				if (infoStream != null)
					Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
				bool success = false;
					if (closeDocStore)
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
						flushState.numDocsInStore = 0;
					IDictionary<DocConsumerPerThread,DocConsumerPerThread> threads = new Support.Dictionary<DocConsumerPerThread,DocConsumerPerThread>();
					for (int i = 0; i < threadStates.Length; i++)
						threads[threadStates[i].consumer] = threadStates[i].consumer;
					consumer.Flush(threads, flushState);
					if (infoStream != null)
                        SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
                        long newSegmentSize = si.SizeInBytes();
                        System.String message = System.String.Format(nf, "  oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
                            new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
					flushedDocCount += flushState.numDocs;
					success = true;
					if (!success)
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				return flushState.numDocs;
Example #25
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state)
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
                IList <TermsHashConsumerPerField> fields = entry.Value;
                foreach (TermsHashConsumerPerField i in fields)
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
                    if (perField.termsHashPerField.numPostings > 0)

            // Sort by field name
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter

            int start = 0;

            while (start < numAllFields)
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;

                start = end;

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
                FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key;

Example #26
        internal override void Flush(Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state)
			lock (this)
                Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>> childThreadsAndFields = new Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>>();
				Support.Dictionary<InvertedDocConsumerPerThread,IList<InvertedDocConsumerPerField>> nextThreadsAndFields;
				if (nextTermsHash != null)
                    nextThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>();
					nextThreadsAndFields = null;

                foreach (KeyValuePair<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> entry in threadsAndFields)
					TermsHashPerThread perThread = (TermsHashPerThread) entry.Key;

                    IList<InvertedDocConsumerPerField> fields = entry.Value;
					IEnumerator<InvertedDocConsumerPerField> fieldsIt = fields.GetEnumerator();
                    List<TermsHashConsumerPerField> childFields = new List<TermsHashConsumerPerField>();
					List<InvertedDocConsumerPerField> nextChildFields;
					if (nextTermsHash != null)
                        nextChildFields = new List<InvertedDocConsumerPerField>();
						nextChildFields = null;
					while (fieldsIt.MoveNext())
						TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current;
						if (nextTermsHash != null)
					childThreadsAndFields[perThread.consumer] = childFields;
					if (nextTermsHash != null)
						nextThreadsAndFields[perThread.nextPerThread] = nextChildFields;
				consumer.Flush(childThreadsAndFields, state);
				ShrinkFreePostings(threadsAndFields, state);
				if (nextTermsHash != null)
					nextTermsHash.Flush(nextThreadsAndFields, state);
Example #27
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void Flush(Support.Dictionary <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > threadsAndFields, SegmentWriteState state)
            Support.Dictionary <FieldInfo, IList <NormsWriterPerField> > byField = new Support.Dictionary <FieldInfo, IList <NormsWriterPerField> >();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            foreach (KeyValuePair <InvertedDocEndConsumerPerThread, IList <InvertedDocEndConsumerPerField> > entry in threadsAndFields)
                IList <InvertedDocEndConsumerPerField>       fields   = entry.Value;
                IEnumerator <InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator();
                List <NormsWriterPerField> fieldsToRemove             = new List <NormsWriterPerField>();

                while (fieldsIt.MoveNext())
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                        // It has some norms
                        IList <NormsWriterPerField> l = byField[perField.fieldInfo];
                        if (l == null)
                            l = new List <NormsWriterPerField>();
                            byField[perField.fieldInfo] = l;
                    // Remove this field since we haven't seen it
                    // since the previous flush

                var fieldsHT = fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                        int numFields = toMerge.Count;


                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                            fields[j] = (NormsWriterPerField)toMerge[j];

                        int numLeft = numFields;

                        while (numLeft > 0)
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                    minDocID = docID;
                                    minLoc   = j;

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)


                            if (uptos[minLoc] == fields[minLoc].upto)
                                if (minLoc != numLeft - 1)
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
Example #28
 /// <summary>Flush a new segment </summary>
 internal abstract void  Flush(Support.Dictionary <InvertedDocConsumerPerThread, System.Collections.Generic.IList <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state);
Example #29
        // Remaps all buffered deletes based on a completed
        // merge
        internal virtual void  Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
            lock (this)
                System.Collections.Generic.IDictionary <Term, Num> newDeleteTerms;

                // Remap delete-by-term
                if (terms.Count > 0)
                    if (doTermSort)
                        newDeleteTerms = new Support.Dictionary <Term, Num>(true);
                        newDeleteTerms = new Support.Dictionary <Term, Num>();
                    foreach (KeyValuePair <Term, Num> entry in terms)
                        Num num = entry.Value;
                        newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum()));
                    newDeleteTerms = null;

                // Remap delete-by-docID
                List <int> newDeleteDocIDs;

                if (docIDs.Count > 0)
                    newDeleteDocIDs = new List <int>(docIDs.Count);
                    foreach (int num in docIDs)
                    newDeleteDocIDs = null;

                // Remap delete-by-query
                Support.Dictionary <Query, int> newDeleteQueries;

                if (queries.Count > 0)
                    newDeleteQueries = new Support.Dictionary <Query, int>(queries.Count);
                    foreach (KeyValuePair <Query, int> entry in queries)
                        int num = entry.Value;
                        newDeleteQueries[entry.Key] = mapper.Remap(num);
                    newDeleteQueries = null;

                if (newDeleteTerms != null)
                    terms = newDeleteTerms;
                if (newDeleteDocIDs != null)
                    docIDs = newDeleteDocIDs;
                if (newDeleteQueries != null)
                    queries = newDeleteQueries;
Example #30
        /// <summary> Init PhrasePositions in place.
        /// There is a one time initialization for this scorer:
        /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc.
        /// <br/>- Also mark each such pp by pp.repeats = true.
        /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
        /// In particular, this allows to score queries with no repetitions with no overhead due to this computation.
        /// <br/>- Example 1 - query with no repetitions: "ho my"~2
        /// <br/>- Example 2 - query with repetitions: "ho my my"~2
        /// <br/>- Example 3 - query with repetitions: "my ho my"~2
        /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
        /// </summary>
        /// <returns> end (max position), or -1 if any term ran out (i.e. done)
        /// </returns>
        /// <throws>  IOException  </throws>
        private int InitPhrasePositions()
            int end = 0;

            // no repeats at all (most common case is also the simplest one)
            if (checkedRepeats && repeats == null)
                // build queue from list
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                    if (pp.position > end)
                        end = pp.position;
                    pq.Put(pp);                     // build pq from list

            // position the pp's
            for (PhrasePositions pp = first; pp != null; pp = pp.next)

            // one time initializatin for this scorer
            if (!checkedRepeats)
                checkedRepeats = true;
                // check for repeats
                Support.Dictionary <PhrasePositions, Object> m = null;
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                    int tpPos = pp.position + pp.offset;
                    for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next)
                        int tpPos2 = pp2.position + pp2.offset;
                        if (tpPos2 == tpPos)
                            if (m == null)
                                m = new Support.Dictionary <PhrasePositions, object>();
                            pp.repeats  = true;
                            pp2.repeats = true;
                            m[pp]       = null;
                            m[pp2]      = null;
                if (m != null)
                    repeats = new PhrasePositions[m.Keys.Count];
                    m.Keys.CopyTo(repeats, 0);

            // with repeats must advance some repeating pp's so they all start with differing tp's
            if (repeats != null)
                for (int i = 0; i < repeats.Length; i++)
                    PhrasePositions pp = repeats[i];
                    PhrasePositions pp2;
                    while ((pp2 = TermPositionsDiffer(pp)) != null)
                        if (!pp2.NextPosition())
                            // out of pps that do not differ, advance the pp with higher offset
                            return(-1);                             // ran out of a term -- done

            // build queue from list
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
                if (pp.position > end)
                    end = pp.position;
                pq.Put(pp);                 // build pq from list

            if (repeats != null)
                tmpPos = new PhrasePositions[pq.Size()];
			public virtual System.Object Get(IndexReader reader, Entry key)
                IDictionary<Entry, Object> innerCache;
				System.Object value_Renamed;
				System.Object readerKey = reader.GetFieldCacheKey();
				lock (readerCache)
					innerCache = readerCache[readerKey];
					if (innerCache == null)
                        innerCache = new Support.Dictionary<Entry, Object>();
						readerCache[readerKey] = innerCache;
						value_Renamed = null;
						value_Renamed = innerCache[key];
					if (value_Renamed == null)
						value_Renamed = new CreationPlaceholder();
						innerCache[key] = value_Renamed;
				if (value_Renamed is CreationPlaceholder)
					lock (value_Renamed)
						CreationPlaceholder progress = (CreationPlaceholder) value_Renamed;
						if (progress.value_Renamed == null)
							progress.value_Renamed = CreateValue(reader, key);
							lock (readerCache)
								innerCache[key] = progress.value_Renamed;
							// Only check if key.custom (the parser) is
							// non-null; else, we check twice for a single
							// call to FieldCache.getXXX
							if (key.custom != null && wrapper != null)
								System.IO.StreamWriter infoStream = wrapper.GetInfoStream();
								if (infoStream != null)
									PrintNewInsanity(infoStream, progress.value_Renamed);
						return progress.value_Renamed;
				return value_Renamed;