Ejemplo n.º 1
0
        private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                int docCount = 0;
                while (docCount < maxDoc)
                {
                    int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
                    termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                    termVectorsWriter.AddAllDocVectors(vectors);
                    checkAbort.Work(300);
                }
            }
        }
Ejemplo n.º 2
0
        private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(docNum))
                        {
                            docNum++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                    termVectorsWriter.AddAllDocVectors(vectors);
                    checkAbort.Work(300);
                }
            }
        }
Ejemplo n.º 3
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                int idx = 0;
                foreach (IndexReader reader in readers)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[idx++];
                    TermVectorsReader matchingVectorsReader = null;
                    if (matchingSegmentReader != null)
                    {
                        TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();

                        // If the TV* files are an older format then they cannot read raw docs:
                        if (vectorsReader != null && vectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = vectorsReader;
                        }
                    }
                    if (reader.HasDeletions)
                    {
                        CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                    else
                    {
                        CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                }
            }
            finally
            {
                termVectorsWriter.Dispose();
            }

            System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
            long          tvxSize  = directory.FileLength(fileName);

            if (4 + ((long)mergedDocs) * 16 != tvxSize)
            {
                // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                // we detect that the bug has struck, here, and
                // throw an exception to prevent the corruption from
                // entering the index.  See LUCENE-1282 for
                // details.
                throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
            }
        }
Ejemplo n.º 4
0
		private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc;
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				int docCount = 0;
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
Ejemplo n.º 5
0
		private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc;
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int docNum = 0; docNum < maxDoc; )
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						++docNum;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = docNum, numDocs = 0;
					do 
					{
						docNum++;
						numDocs++;
						if (docNum >= maxDoc)
							break;
						if (reader.IsDeleted(docNum))
						{
							docNum++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						continue;
					}
					
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
Ejemplo n.º 6
0
		/// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
		/// <throws>  IOException </throws>
		private void  MergeVectors()
		{
			TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
			
			try
			{
				int idx = 0;
				foreach(IndexReader reader in readers)
				{
					SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
					TermVectorsReader matchingVectorsReader = null;
					if (matchingSegmentReader != null)
					{
						TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
						
						// If the TV* files are an older format then they cannot read raw docs:
						if (vectorsReader != null && vectorsReader.CanReadRawDocs())
						{
							matchingVectorsReader = vectorsReader;
						}
					}
					if (reader.HasDeletions)
					{
						CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
					}
					else
					{
						CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
					}
				}
			}
			finally
			{
				termVectorsWriter.Dispose();
			}
			
			System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
			long tvxSize = directory.FileLength(fileName);
			
			if (4 + ((long) mergedDocs) * 16 != tvxSize)
			// This is most likely a bug in Sun JRE 1.6.0_04/_05;
			// we detect that the bug has struck, here, and
			// throw an exception to prevent the corruption from
			// entering the index.  See LUCENE-1282 for
			// details.
				throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
		}