Пример #1
0
		protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc)
		{
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
		}
Пример #2
0
		public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (IndexInput) parent.core.freqStream.Clone();
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
			this.skipInterval = parent.core.GetTermsReader().SkipInterval;
			this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels;
		}
Пример #3
0
		/// <summary> Test the term index.</summary>
		private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
		{
			var status = new Status.TermIndexStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: terms, freq, prox...");
				}
				
				TermEnum termEnum = reader.Terms();
				TermPositions termPositions = reader.TermPositions();
				
				// Used only to count up # deleted docs for this term
				var myTermDocs = new MySegmentTermDocs(reader);
				
				int maxDoc = reader.MaxDoc;
				
				while (termEnum.Next())
				{
					status.termCount++;
					Term term = termEnum.Term;
					int docFreq = termEnum.DocFreq();
					termPositions.Seek(term);
					int lastDoc = - 1;
					int freq0 = 0;
					status.totFreq += docFreq;
					while (termPositions.Next())
					{
						freq0++;
						int doc = termPositions.Doc;
						int freq = termPositions.Freq;
						if (doc <= lastDoc)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
						}
						if (doc >= maxDoc)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
						}
						
						lastDoc = doc;
						if (freq <= 0)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
						}
						
						int lastPos = - 1;
						status.totPos += freq;
						for (int j = 0; j < freq; j++)
						{
							int pos = termPositions.NextPosition();
							if (pos < - 1)
							{
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
							}
							if (pos < lastPos)
							{
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
							}
						    lastPos = pos;
						}
					}
					
					// Now count how many deleted docs occurred in
					// this term:
					int delCount;
					if (reader.HasDeletions)
					{
						myTermDocs.Seek(term);
						while (myTermDocs.Next())
						{
						}
						delCount = myTermDocs.delCount;
					}
					else
					{
						delCount = 0;
					}
					
					if (freq0 + delCount != docFreq)
					{
						throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
					}
				}
				
				Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
			}
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Пример #4
0
		/// <summary> Test field norms.</summary>
        private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)
		{
			var status = new Status.FieldNormStatus();
			
			try
			{
				// Test Field Norms
				if (infoStream != null)
				{
					infoStream.Write("    test: field norms.........");
				}

				var b = new byte[reader.MaxDoc];
				foreach(string fieldName in fieldNames)
				{
                    if (reader.HasNorms(fieldName))
                    {
                        reader.Norms(fieldName, b, 0);
                        ++status.totFields;
                    }
				}
				
				Msg("OK [" + status.totFields + " fields]");
			}
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Пример #5
0
			internal MySegmentTermDocs(SegmentReader p):base(p)
			{
			}
Пример #6
0
		/// <summary>Carefully merges deletes for the segments we just
		/// merged.  This is tricky because, although merging will
		/// clear all deletes (compacts the documents), new
		/// deletes may have been flushed to the segments since
		/// the merge was started.  This method "carries over"
		/// such new deletes onto the newly merged segment, and
		/// saves the resulting deletes file (incrementing the
		/// delete generation for merge.info).  If no deletes were
		/// flushed, no new deletes file is saved. 
		/// </summary>
		private void  CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes"));
				
				SegmentInfos sourceSegments = merge.segments;
				
				if (infoStream != null)
					Message("commitMergeDeletes " + merge.SegString(directory));
				
				// Carefully merge deletes that occurred after we
				// started merging:
				int docUpto = 0;
				int delCount = 0;
				
				for (int i = 0; i < sourceSegments.Count; i++)
				{
					SegmentInfo info = sourceSegments.Info(i);
					int docCount = info.docCount;
					SegmentReader previousReader = merge.readersClone[i];
					SegmentReader currentReader = merge.readers[i];
					if (previousReader.HasDeletions)
					{
						
						// There were deletes on this segment when the merge
						// started.  The merge has collapsed away those
						// deletes, but, if new deletes were flushed since
						// the merge started, we must now carefully keep any
						// newly flushed deletes but mapping them to the new
						// docIDs.
						
						if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs)
						{
							// This means this segment has had new deletes
							// committed since we started the merge, so we
							// must merge them:
							for (int j = 0; j < docCount; j++)
							{
								if (previousReader.IsDeleted(j))
								{
									System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j));
                                }
								else
								{
									if (currentReader.IsDeleted(j))
									{
										mergeReader.DoDelete(docUpto);
										delCount++;
									}
									docUpto++;
								}
							}
						}
						else
						{
							docUpto += docCount - previousReader.NumDeletedDocs;
						}
					}
					else if (currentReader.HasDeletions)
					{
						// This segment had no deletes before but now it
						// does:
						for (int j = 0; j < docCount; j++)
						{
							if (currentReader.IsDeleted(j))
							{
								mergeReader.DoDelete(docUpto);
								delCount++;
							}
							docUpto++;
						}
					}
					// No deletes before or after
					else
						docUpto += info.docCount;
				}
				
				System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount);
				
				mergeReader.hasChanges = delCount > 0;
			}
		}
Пример #7
0
			/// <summary> Release the segment reader (i.e. decRef it and close if there
			/// are no more references.
			/// </summary>
			/// <param name="sr">
			/// </param>
			/// <throws>  IOException </throws>
			public virtual void  Release(SegmentReader sr)
			{
				lock (this)
				{
					Release(sr, false);
				}
			}
Пример #8
0
		internal SegmentTermPositions(SegmentReader p):base(p)
		{
			this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
		}
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingStoredFieldsReader matchingFieldsReader = null;
                if (matchingSegmentReader != null)
                {
                    StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
                    if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)
                    {
                        matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader;
                    }
                }

                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;

                if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size -  means reader version is not the same as the writer version
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Document doc = reader.Document(i);
                        AddDocument(doc, mergeState.FieldInfos);
                        ++docCount;
                        mergeState.CheckAbort.Work(300);
                    }
                }
                else
                {
                    int docID = NextLiveDoc(0, liveDocs, maxDoc);
                    if (docID < maxDoc)
                    {
                        // not all docs were deleted
                        CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID);
                        int[] startOffsets = new int[0];
                        do
                        {
                            // go to the next chunk that contains docID
                            it.Next(docID);
                            // transform lengths into offsets
                            if (startOffsets.Length < it.chunkDocs)
                            {
                                startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)];
                            }
                            for (int i = 1; i < it.chunkDocs; ++i)
                            {
                                startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
                            }

                            if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk -  chunk is large enough -  chunk is small enough -  starting a new chunk
                            {
                                Debug.Assert(docID == it.docBase);

                                // no need to decompress, just copy data
                                indexWriter.WriteIndex(it.chunkDocs, fieldsStream.GetFilePointer());
                                WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
                                it.CopyCompressedData(fieldsStream);
                                this.docBase += it.chunkDocs;
                                docID         = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
                                docCount     += it.chunkDocs;
                                mergeState.CheckAbort.Work(300 * it.chunkDocs);
                            }
                            else
                            {
                                // decompress
                                it.Decompress();
                                if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length)
                                {
                                    throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length);
                                }
                                // copy non-deleted docs
                                for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc))
                                {
                                    int diff = docID - it.docBase;
                                    StartDocument(it.numStoredFields[diff]);
                                    bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]);
                                    FinishDocument();
                                    ++docCount;
                                    mergeState.CheckAbort.Work(300);
                                }
                            }
                        } while (docID < maxDoc);

                        it.CheckIntegrity();
                    }
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Пример #10
0
 /// <summary>This constructor is only used for <see cref="Reopen()" /> </summary>
 internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
                          IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
 {
     this.internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = infos;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(directory, true));
     }
     
     // we put the old SegmentReaders in a map, that allows us
     // to lookup a reader using its segment name
     IDictionary<string, int> segmentReaders = new HashMap<string, int>();
     
     if (oldReaders != null)
     {
         // create a Map SegmentName->SegmentReader
         for (int i = 0; i < oldReaders.Length; i++)
         {
             segmentReaders[oldReaders[i].SegmentName] = i;
         }
     }
     
     var newReaders = new SegmentReader[infos.Count];
     
     // remember which readers are shared between the old and the re-opened
     // DirectoryReader - we have to incRef those readers
     var readerShared = new bool[infos.Count];
     
     for (int i = infos.Count - 1; i >= 0; i--)
     {
         // find SegmentReader for this segment
         if (!segmentReaders.ContainsKey(infos.Info(i).name))
         {
             // this is a new segment, no old SegmentReader can be reused
             newReaders[i] = null;
         }
         else
         {
             // there is an old reader for this segment - we'll try to reopen it
             newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]];
         }
         
         bool success = false;
         try
         {
             SegmentReader newReader;
             if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile())
             {
                 
                 // We should never see a totally new segment during cloning
                 System.Diagnostics.Debug.Assert(!doClone);
                 
                 // this is a new reader; in case we hit an exception we can close it safely
                 newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
             }
             else
             {
                 newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
             }
             if (newReader == newReaders[i])
             {
                 // this reader will be shared between the old and the new one,
                 // so we must incRef it
                 readerShared[i] = true;
                 newReader.IncRef();
             }
             else
             {
                 readerShared[i] = false;
                 newReaders[i] = newReader;
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 for (i++; i < infos.Count; i++)
                 {
                     if (newReaders[i] != null)
                     {
                         try
                         {
                             if (!readerShared[i])
                             {
                                 // this is a new subReader that is not used by the old one,
                                 // we can close it
                                 newReaders[i].Close();
                             }
                             else
                             {
                                 // this subReader is also used by the old reader, so instead
                                 // closing we must decRef it
                                 newReaders[i].DecRef();
                             }
                         }
                         catch (System.IO.IOException)
                         {
                             // keep going - we want to clean up as much as possible
                         }
                     }
                 }
             }
         }
     }
     
     // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
     Initialize(newReaders);
     
     // try to copy unchanged norms from the old normsCache to the new one
     if (oldNormsCache != null)
     {
         foreach(var entry in oldNormsCache)
         {
             String field = entry.Key;
             if (!HasNorms(field))
             {
                 continue;
             }
             
             byte[] oldBytes = entry.Value;
             
             var bytes = new byte[MaxDoc];
             
             for (int i = 0; i < subReaders.Length; i++)
             {
                 int oldReaderIndex = segmentReaders[subReaders[i].SegmentName];
                 
                 // this SegmentReader was not re-opened, we can copy all of its norms 
                 if (segmentReaders.ContainsKey(subReaders[i].SegmentName) &&
                      (oldReaders[oldReaderIndex] == subReaders[i]
                        || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]))
                 {
                     // we don't have to synchronize here: either this constructor is called from a SegmentReader,
                     // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
                     // which is synchronized
                     Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
                 }
                 else
                 {
                     subReaders[i].Norms(field, bytes, starts[i]);
                 }
             }
             
             normsCache[field] = bytes; // update cache
         }
     }
 }
Пример #11
0
        private void  Initialize(SegmentReader[] subReaders)
        {
            this.subReaders = subReaders;
            starts = new int[subReaders.Length + 1]; // build starts array
            for (int i = 0; i < subReaders.Length; i++)
            {
                starts[i] = maxDoc;
                maxDoc += subReaders[i].MaxDoc; // compute maxDocs
                
                if (subReaders[i].HasDeletions)
                    hasDeletions = true;
            }
            starts[subReaders.Length] = maxDoc;

            if (!readOnly)
            {
                maxIndexVersion = SegmentInfos.ReadCurrentVersion(internalDirectory);
            }
        }
Пример #12
0
 // Used by near real-time search
 internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
 {
     this.internalDirectory = writer.Directory;
     this.readOnly = true;
     segmentInfos = infos;
     segmentInfosStart = (SegmentInfos) infos.Clone();
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(internalDirectory, true));
     }
     
     // IndexWriter synchronizes externally before calling
     // us, which ensures infos will not change; so there's
     // no need to process segments in reverse order
     int numSegments = infos.Count;
     var readers = new SegmentReader[numSegments];
     Directory dir = writer.Directory;
     int upto = 0;
     
     for (int i = 0; i < numSegments; i++)
     {
         bool success = false;
         try
         {
             SegmentInfo info = infos.Info(i);
             if (info.dir == dir)
             {
                 readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (upto--; upto >= 0; upto--)
                 {
                     try
                     {
                         readers[upto].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     this.writer = writer;
     
     if (upto < readers.Length)
     {
         // This means some segments were in a foreign Directory
         var newReaders = new SegmentReader[upto];
         Array.Copy(readers, 0, newReaders, 0, upto);
         readers = newReaders;
     }
     
     Initialize(readers);
 }
Пример #13
0
 /// <summary>Construct reading the named set of readers. </summary>
 internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
 {
     internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = sis;
     this.deletionPolicy = deletionPolicy;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(sis.Files(directory, true));
     }
     
     // To reduce the chance of hitting FileNotFound
     // (and having to retry), we open segments in
     // reverse because IndexWriter merges & deletes
     // the newest segments first.
     
     var readers = new SegmentReader[sis.Count];
     for (int i = sis.Count - 1; i >= 0; i--)
     {
         bool success = false;
         try
         {
             readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (i++; i < sis.Count; i++)
                 {
                     try
                     {
                         readers[i].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     Initialize(readers);
 }
Пример #14
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
            {
                IndexReader reader = (IndexReader)iter.Current;
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                    {
                        IndexReader   reader = (IndexReader)iter.Current;
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions())
                        {
                            docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    docCount += ((IndexReader)iter.Current).NumDocs();
                }
            }

            return(docCount);
        }
Пример #15
0
		/// <summary> Test stored fields for a segment.</summary>
		private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
		{
			var status = new Status.StoredFieldStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: stored fields.......");
				}
				
				// Scan stored fields for all documents
				for (int j = 0; j < info.docCount; ++j)
				{
					if (!reader.IsDeleted(j))
					{
						status.docCount++;
						Document.Document doc = reader.Document(j);
						status.totFields += doc.GetFields().Count;
					}
				}
				
				// Validate docCount
				if (status.docCount != reader.NumDocs())
				{
					throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
				}
				
                Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
            }
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Пример #16
0
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader compressingTermVectorsReader)
                    {
                        matchingVectorsReader = compressingTermVectorsReader;
                    }
                }

                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.CheckAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig             = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;)
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.GetFilePointer())
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase   = vectorsStream.ReadVInt32();
                            int chunkDocs = vectorsStream.ReadVInt32();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            }
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd    = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.GetFilePointer();
                                indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer());
                                this.vectorsStream.WriteVInt32(docCount);
                                this.vectorsStream.WriteVInt32(chunkDocs);
                                this.vectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount     += chunkDocs;
                                this.numDocs += chunkDocs;
                                mergeState.CheckAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.CheckAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.CheckAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Пример #17
0
		/// <summary> Test term vectors for a segment.</summary>
        private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
		{
			var status = new Status.TermVectorStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: term vectors........");
				}
				
				for (int j = 0; j < info.docCount; ++j)
				{
					if (!reader.IsDeleted(j))
					{
						status.docCount++;
						ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
						if (tfv != null)
						{
							status.totVectors += tfv.Length;
						}
					}
				}
				
                Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
            }
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
 public override void Run()
 {
     if (VERBOSE)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     string source;
                     diagnostics.TryGetValue("source", out source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetIterator(null);
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount.Get() < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount.Get() / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         BytesRef term = termsEnum.Next();
                         if (term == null)
                         {
                             totTermCount.Set(seenTermCount);
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Set(true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }
Пример #19
0
		/* FIXME if we want to support non-contiguous segment merges */
		private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge"));
				
				if (hitOOM)
				{
					throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge");
				}
				
				if (infoStream != null)
					Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString());
				
				System.Diagnostics.Debug.Assert(merge.registerDone);
				
				// If merge was explicitly aborted, or, if rollback() or
				// rollbackTransaction() had been called since our merge
				// started (which results in an unqualified
				// deleter.refresh() call that will remove any index
				// file that current segments does not reference), we
				// abort this merge
				if (merge.IsAborted())
				{
					if (infoStream != null)
						Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted");
					
					return false;
				}
				
				int start = EnsureContiguousMerge(merge);
				
				CommitMergedDeletes(merge, mergedReader);
				docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount);

                // If the doc store we are using has been closed and
                // is in now compound format (but wasn't when we
                // started), then we will switch to the compound
                // format as well:
                SetMergeDocStoreIsCompoundFile(merge);
				
				merge.info.HasProx = merger.HasProx();
				
				segmentInfos.RemoveRange(start, start + merge.segments.Count - start);
				System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info));
				segmentInfos.Insert(start, merge.info);

                CloseMergeReaders(merge, false);
				
				// Must note the change to segmentInfos so any commits
				// in-flight don't lose it:
				Checkpoint();
				
				// If the merged segments had pending changes, clear
				// them so that they don't bother writing them to
				// disk, updating SegmentInfo, etc.:
				readerPool.Clear(merge.segments);

                if (merge.optimize)
                {
                    // cascade the optimize:
                    segmentsToOptimize.Add(merge.info);
                }
				return true;
			}
		}
 internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.Generic.IDictionary<string, byte[]> oldNormsCache, bool doClone, int termInfosIndexDivisor)
     : base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor)
 {
 }
Пример #21
0
		    /// <summary> Release the segment reader (i.e. decRef it and close if there
		    /// are no more references.
		    /// </summary>
		    /// <param name="sr">
		    /// </param>
		    /// <param name="drop"></param>
		    /// <throws>  IOException </throws>
		    public virtual void  Release(SegmentReader sr, bool drop)
			{
				lock (this)
				{
					
					bool pooled = readerMap.ContainsKey(sr.SegmentInfo);

                    System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr);

                    // Drop caller's ref; for an external reader (not
                    // pooled), this decRef will close it
					sr.DecRef();
					
					if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1)))
					{

                        // We invoke deleter.checkpoint below, so we must be
                        // sync'd on IW if there are changes:
						
                        // TODO: Java 1.5 has this, .NET can't.
						// System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance));

                        // Discard (don't save) changes when we are dropping
                        // the reader; this is used only on the sub-readers
                        // after a successful merge.
                        sr.hasChanges &= !drop;

                        bool hasChanges = sr.hasChanges;
						
						// Drop our ref -- this will commit any pending
						// changes to the dir
                        sr.Close();

                        // We are the last ref to this reader; since we're
                        // not pooling readers, we release it:
                        readerMap.Remove(sr.SegmentInfo);

                        if (hasChanges)
                        {
                            // Must checkpoint w/ deleter, because this
                            // segment reader will have created new _X_N.del
                            // file.
                            enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
                        }
					}
				}
			}
 public override void Run()
 {
     if (Verbose)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     diagnostics.TryGetValue("source", out string source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetEnumerator();
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                     {
                         if (!termsEnum.MoveNext())
                         {
                             totTermCount.Value = seenTermCount;
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t) when(t.IsThrowable())
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Value = (true);
             Console.WriteLine(t.ToString());
             throw RuntimeException.Create(t);
         }
     }
 }