IsDeleted() public abstract méthode

Returns true if document n has been deleted
public abstract IsDeleted ( int n ) : bool
n int
Résultat bool
Exemple #1
0
        private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Exemple #2
0
        private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(docNum))
                        {
                            docNum++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                    termVectorsWriter.AddAllDocVectors(vectors);
                    checkAbort.Work(300);
                }
            }
        }
Exemple #3
0
 private void  MergeNorms()
 {
     for (int i = 0; i < fieldInfos.Size(); i++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(i);
         if (fi.isIndexed && !fi.omitNorms)
         {
             IndexOutput output = directory.CreateOutput(segment + ".f" + i);
             try
             {
                 for (int j = 0; j < readers.Count; j++)
                 {
                     IndexReader reader = (IndexReader)readers[j];
                     int         maxDoc = reader.MaxDoc();
                     byte[]      input  = new byte[maxDoc];
                     reader.Norms(fi.name, input, 0);
                     for (int k = 0; k < maxDoc; k++)
                     {
                         if (!reader.IsDeleted(k))
                         {
                             output.WriteByte(input[k]);
                         }
                     }
                 }
             }
             finally
             {
                 output.Close();
             }
         }
     }
 }
        /// <summary> 1. Get a norm from the original reader 2. Clone the original reader 3.
        /// Delete a document and set the norm of the cloned reader 4. Verify the norms
        /// are not the same on each reader 5. Verify the doc deleted is only in the
        /// cloned reader 6. Try to delete a document in the original reader, an
        /// exception should be thrown
        ///
        /// </summary>
        /// <param name="r1">IndexReader to perform tests on
        /// </param>
        /// <throws>  Exception </throws>
        private void  PerformDefaultTests(IndexReader r1)
        {
            float norm1 = Similarity.DecodeNorm(r1.Norms("field1")[4]);

            IndexReader pr1Clone = (IndexReader)r1.Clone();

            pr1Clone.DeleteDocument(10);
            pr1Clone.SetNorm(4, "field1", 0.5f);
            Assert.IsTrue(Similarity.DecodeNorm(r1.Norms("field1")[4]) == norm1);
            Assert.IsTrue(Similarity.DecodeNorm(pr1Clone.Norms("field1")[4]) != norm1);

            Assert.IsTrue(!r1.IsDeleted(10));
            Assert.IsTrue(pr1Clone.IsDeleted(10));

            // try to update the original reader, which should throw an exception
            try
            {
                r1.DeleteDocument(11);
                Assert.Fail("Tried to delete doc 11 and an exception should have been thrown");
            }
            catch (System.Exception exception)
            {
                // expectted
            }
            pr1Clone.Close();
        }
Exemple #5
0
        private void  MergeNorms()
        {
            byte[]      normBuffer = null;
            IndexOutput output     = null;

            try
            {
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.isIndexed && !fi.omitNorms)
                    {
                        if (output == null)
                        {
                            output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
                            output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
                        }
                        for (int j = 0; j < readers.Count; j++)
                        {
                            IndexReader reader = (IndexReader)readers[j];
                            int         maxDoc = reader.MaxDoc();
                            if (normBuffer == null || normBuffer.Length < maxDoc)
                            {
                                // the buffer is too small for the current segment
                                normBuffer = new byte[maxDoc];
                            }
                            reader.Norms(fi.name, normBuffer, 0);
                            if (!reader.HasDeletions())
                            {
                                //optimized case for segments without deleted docs
                                output.WriteBytes(normBuffer, maxDoc);
                            }
                            else
                            {
                                // this segment has deleted docs, so we have to
                                // check for every doc if it is deleted or not
                                for (int k = 0; k < maxDoc; k++)
                                {
                                    if (!reader.IsDeleted(k))
                                    {
                                        output.WriteByte(normBuffer[k]);
                                    }
                                }
                            }
                            if (checkAbort != null)
                            {
                                checkAbort.Work(maxDoc);
                            }
                        }
                    }
                }
            }
            finally
            {
                if (output != null)
                {
                    output.Close();
                }
            }
        }
Exemple #6
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    IndexReader reader = (IndexReader)readers[r];
                    int         maxDoc = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc; docNum++)
                    {
                        // skip deleted docs
                        if (reader.IsDeleted(docNum))
                        {
                            continue;
                        }
                        termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
                        if (checkAbort != null)
                        {
                            checkAbort.Work(300);
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }
        }
 // maps around deleted docs
 internal int[] GetDocMap()
 {
     if (docMap == null)
     {
         delCount = 0;
         // build array which maps document numbers around deletions
         if (reader.HasDeletions)
         {
             int maxDoc = reader.MaxDoc;
             docMap = new int[maxDoc];
             int j = 0;
             for (int i = 0; i < maxDoc; i++)
             {
                 if (reader.IsDeleted(i))
                 {
                     delCount++;
                     docMap[i] = -1;
                 }
                 else
                 {
                     docMap[i] = j++;
                 }
             }
         }
     }
     return(docMap);
 }
        static void MakeLatestVersionLookupPerReader(IDictionary<string, Tuple<NuGetVersion, string, int>> lookup, IndexReader reader, string readerName, bool includePrerelease, bool includeUnlisted)
        {
            for (int n = 0; n < reader.MaxDoc; n++)
            {
                if (reader.IsDeleted(n))
                {
                    continue;
                }

                Document document = reader.Document(n);

                NuGetVersion version = GetVersion(document);

                if (version == null)
                {
                    continue;
                }

                bool isListed = GetListed(document);

                if (isListed || includeUnlisted)
                {
                    if (!version.IsPrerelease || includePrerelease)
                    {
                        string id = GetId(document);

                        if (id == null)
                        {
                            continue;
                        }

                        Tuple<NuGetVersion, string, int> existingVersion;
                        if (lookup.TryGetValue(id, out existingVersion))
                        {
                            if (version > existingVersion.Item1)
                            {
                                lookup[id] = Tuple.Create(version, readerName, n);
                            }
                        }
                        else
                        {
                            lookup.Add(id, Tuple.Create(version, readerName, n));
                        }
                    }
                }
            }
        }
        protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
        {
            int maxDoc = reader.MaxDoc;
            long[] daysAgo = new long[maxDoc];
            long[] publishDate = FieldCache_Fields.DEFAULT.GetLongs(reader, SearchDocument.TimeStampFieldName);
            long currentDate = DateTime.UtcNow.ToFileTime();
            long timeSpanFromDaysTicks = TimeSpan.FromDays(36500).Ticks;

            for (int i = 0; i < maxDoc; i++)
            {
                if (!reader.IsDeleted(i))
                    daysAgo[i] = currentDate - publishDate[i];
            }
            var recencyData = new RecencyOptions(daysAgo, 5, timeSpanFromDaysTicks);

            return new RecencyScoreProvider(reader, recencyData);
        }
Exemple #10
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
            // in  merge mode, we use this FieldSelector
            FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
        void ProcessReader(IndexReader indexReader, string readerName, ref int perIndexDocumentNumber)
        {
            for (int perReaderDocumentNumber = 0; perReaderDocumentNumber < indexReader.MaxDoc; perReaderDocumentNumber++)
            {
                if (indexReader.IsDeleted(perReaderDocumentNumber))
                {
                    ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, null, isDelete: true);
                }
                else
                {
                    Document document = indexReader.Document(perReaderDocumentNumber);
                    ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, document, isDelete: false);
                }

                perIndexDocumentNumber++;
            }
        }
        public PackageVersions(IndexReader reader)
        {
            _reader = reader;
            _registrations = new Dictionary<string, List<NuGetVersion>>();

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                if (reader.IsDeleted(i))
                {
                    continue;
                }

                Document document = reader[i];

                NuGetVersion currentVersion = GetVersion(document);

                if (currentVersion == null)
                {
                    continue;
                }

                string id = GetId(document);

                if (id == null)
                {
                    continue;
                }

                List<NuGetVersion> versions;
                if (!_registrations.TryGetValue(id, out versions))
                {
                    versions = new List<NuGetVersion>();
                    _registrations.Add(id, versions);
                }

                versions.Add(currentVersion);
            }

            foreach (List<NuGetVersion> values in _registrations.Values)
            {
                values.Sort();
            }
        }
Exemple #13
0
        /// <summary> 1. Get a norm from the original reader 2. Clone the original reader 3.
        /// Delete a document and set the norm of the cloned reader 4. Verify the norms
        /// are not the same on each reader 5. Verify the doc deleted is only in the
        /// cloned reader 6. Try to delete a document in the original reader, an
        /// exception should be thrown
        ///
        /// </summary>
        /// <param name="r1">IndexReader to perform tests on
        /// </param>
        /// <throws>  Exception </throws>
        private void  PerformDefaultTests(IndexReader r1)
        {
            float norm1 = Similarity.DecodeNorm(r1.Norms("field1")[4]);

            IndexReader pr1Clone = (IndexReader)r1.Clone();

            pr1Clone.DeleteDocument(10);
            pr1Clone.SetNorm(4, "field1", 0.5f);
            Assert.IsTrue(Similarity.DecodeNorm(r1.Norms("field1")[4]) == norm1);
            Assert.IsTrue(Similarity.DecodeNorm(pr1Clone.Norms("field1")[4]) != norm1);

            Assert.IsTrue(!r1.IsDeleted(10));
            Assert.IsTrue(pr1Clone.IsDeleted(10));

            // try to update the original reader, which should throw an exception
            Assert.Throws <LockObtainFailedException>(() => r1.DeleteDocument(11),
                                                      "Tried to delete doc 11 and an exception should have been thrown");
            pr1Clone.Close();
        }
Exemple #14
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Exemple #15
0
        public virtual void  TestCloneWithDeletes()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, false);
            IndexReader origReader = IndexReader.Open(dir1, false);

            origReader.DeleteDocument(1);

            IndexReader clonedReader = (IndexReader)origReader.Clone();

            origReader.Close();
            clonedReader.Close();

            IndexReader r = IndexReader.Open(dir1, false);

            Assert.IsTrue(r.IsDeleted(1));
            r.Close();
            dir1.Close();
        }
Exemple #16
0
        private void  CheckExpecteds(System.Collections.BitArray expecteds)
        {
            IndexReader r = IndexReader.Open(dir);

            //Perhaps not the most efficient approach but meets our needs here.
            for (int i = 0; i < r.MaxDoc(); i++)
            {
                if (!r.IsDeleted(i))
                {
                    System.String sval = r.Document(i).Get(FIELD_RECORD_ID);
                    if (sval != null)
                    {
                        int val = System.Int32.Parse(sval);
                        Assert.IsTrue(expecteds.Get(val), "Did not expect document #" + val);
                        expecteds.Set(val, false);
                    }
                }
            }
            r.Close();
            Assert.AreEqual(0, SupportClass.BitSetSupport.Cardinality(expecteds), "Should have 0 docs remaining ");
        }
Exemple #17
0
        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
        {
            if (_indexDirectory == null)
                yield break;

            try {
                _reader = IndexReader.Open(_indexDirectory, true);
            }
            catch (Exception) {
                Warn("Failed to open lucene index in {0}.", _indexDirectory.Directory.FullName);
                yield break;
            }

            var docCount = _reader.NumDocs();
            Info("Found {0} documents in lucene index.", docCount);

            for (var i = 0; i < docCount; i++) {

                if (_reader.IsDeleted(i))
                    continue;

                var doc = _reader.Document(i);
                var row = new Row();
                foreach (var field in doc.GetFields().Where(field => field.IsStored)) {
                    switch (field.Name) {
                        case "dropped":
                            row[field.Name] = Convert.ToBoolean(field.StringValue);
                            break;
                        default:
                            row[field.Name] = field.StringValue;
                            break;
                    }

                }
                yield return row;

            }
        }
        public static IEnumerable<string> GetDistintTenantId(IndexReader reader)
        {
            HashSet<string> result = new HashSet<string>();

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                if (reader.IsDeleted(i))
                {
                    continue;
                }

                Document document = reader[i];

                string tenantId = document.Get("TenantId");

                if (tenantId != null)
                {
                    result.Add(tenantId);
                }
            }

            return result;
        }
Exemple #19
0
        private static Dictionary<string, int[]> FillCache(IndexReader reader, int docBase, string field)
        {
            using (var termDocs = reader.TermDocs())
            {
                var items = new Dictionary<string, int[]>();
                var docsForTerm = new List<int>();

                using (var termEnum = reader.Terms(new Term(field)))
                {
                    do
                    {
                        if (termEnum.Term == null || field != termEnum.Term.Field)
                            break;

                        Term term = termEnum.Term;
                        if (LowPrecisionNumber(term.Field, term.Text))
                            continue;

                        var totalDocCountIncludedDeletes = termEnum.DocFreq();
                        termDocs.Seek(termEnum.Term);
                        while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                        {
                            var curDoc = termDocs.Doc;
                            totalDocCountIncludedDeletes -= 1;
                            if (reader.IsDeleted(curDoc))
                                continue;

                            docsForTerm.Add(curDoc + docBase);
                        }
                        docsForTerm.Sort();
                        items[term.Text] = docsForTerm.ToArray();
                        docsForTerm.Clear();
                    } while (termEnum.Next());
                }
                return items;
            }
        }
Exemple #20
0
 public override bool IsDeleted(int n)
 {
     // Don't call ensureOpen() here (it could affect performance)
     return(in_Renamed.IsDeleted(n));
 }
Exemple #21
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Exemple #22
0
		private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc();
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int docNum = 0; docNum < maxDoc; )
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						++docNum;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = docNum, numDocs = 0;
					do 
					{
						docNum++;
						numDocs++;
						if (docNum >= maxDoc)
							break;
						if (reader.IsDeleted(docNum))
						{
							docNum++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						continue;
					}
					
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
Exemple #23
0
        private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed)
        {
            int count             = 0;
            int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2;
            // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
            int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length;

            IndexReader reader = IndexReader.Open(dir, true);

            try
            {
                // look into sub readers and check if raw merge is on/off
                var readers = new System.Collections.Generic.List <IndexReader>();
                ReaderUtil.GatherSubReaders(readers, reader);
                foreach (IndexReader ir in readers)
                {
                    FieldsReader fr = ((SegmentReader)ir).GetFieldsReader();
                    Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(),
                                  "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index");
                }

                // test that decompression works correctly
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i);
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        IFieldable compressed = d.GetFieldable("compressed");
                        if (int.Parse(d.Get("id")) % 2 == 0)
                        {
                            Assert.IsFalse(compressed.IsBinary);
                            Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue,
                                            "incorrectly decompressed string");
                        }
                        else
                        {
                            Assert.IsTrue(compressed.IsBinary);
                            Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue()),
                                          "incorrectly decompressed binary");
                        }
                    }
                }

                //check if field was decompressed after optimize
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i, new AnonymousFieldSelector());
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves):
                        // ugh, Java uses Big-Endian streams, so we need to do it manually.
                        byte[] encodedSize    = d.GetFieldable("compressed").GetBinaryValue().Take(4).Reverse().ToArray();
                        int    actualSize     = BitConverter.ToInt32(encodedSize, 0);
                        int    compressedSize = int.Parse(d.Get("compressedSize"));
                        bool   binary         = int.Parse(d.Get("id")) % 2 > 0;
                        int    shouldSize     = shouldStillBeCompressed
                                             ? compressedSize
                                             : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
                        Assert.AreEqual(shouldSize, actualSize, "size incorrect");
                        if (!shouldStillBeCompressed)
                        {
                            Assert.IsFalse(compressedSize == actualSize,
                                           "uncompressed field should have another size than recorded in index");
                        }
                    }
                }
                Assert.AreEqual(34 * 2, count, "correct number of tests");
            }
            finally
            {
                reader.Dispose();
            }
        }
Exemple #24
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
        private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, List<string> fieldsToRead,IndexReader reader)
        {
            foreach (var field in fieldsToRead)
            {
                using (var termDocs = reader.TermDocs())
                {
                    using (var termEnum = reader.Terms(new Term(field)))
                    {
                        do
                        {
                            if (termEnum.Term == null || field != termEnum.Term.Field)
                                break;

                            if (LowPrecisionNumber(termEnum.Term))
                                continue;


                            var totalDocCountIncludedDeletes = termEnum.DocFreq();
                            termDocs.Seek(termEnum.Term);

                            while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                            {
                                totalDocCountIncludedDeletes -= 1;
                                if (reader.IsDeleted(termDocs.Doc))
                                    continue;

                                state.SetInCache(field, termDocs.Doc, termEnum.Term);
                            }
                        } while (termEnum.Next());
                    }
                }
            }
        }
Exemple #26
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[r];
                    TermVectorsReader matchingVectorsReader;
                    bool hasMatchingReader;
                    if (matchingSegmentReader != null)
                    {
                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;

                        // If the TV* files are an older format then they
                        // cannot read raw docs:
                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = null;
                            hasMatchingReader     = false;
                        }
                        else
                        {
                            hasMatchingReader = matchingVectorsReader != null;
                        }
                    }
                    else
                    {
                        hasMatchingReader     = false;
                        matchingVectorsReader = null;
                    }
                    IndexReader reader       = (IndexReader)readers[r];
                    bool        hasDeletions = reader.HasDeletions();
                    int         maxDoc       = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc;)
                    {
                        // skip deleted docs
                        if (!hasDeletions || !reader.IsDeleted(docNum))
                        {
                            if (hasMatchingReader)
                            {
                                // We can optimize this case (doing a bulk
                                // byte copy) since the field numbers are
                                // identical
                                int start   = docNum;
                                int numDocs = 0;
                                do
                                {
                                    docNum++;
                                    numDocs++;
                                    if (docNum >= maxDoc)
                                    {
                                        break;
                                    }
                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
                                    {
                                        docNum++;
                                        break;
                                    }
                                } while (numDocs < MAX_RAW_MERGE_DOCS);

                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300 * numDocs);
                                }
                            }
                            else
                            {
                                // NOTE: it's very important to first assign
                                // to vectors then pass it to
                                // termVectorsWriter.addAllDocVectors; see
                                // LUCENE-1282
                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                                termVectorsWriter.AddAllDocVectors(vectors);
                                docNum++;
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            docNum++;
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);

            // {{dougsale-2.4.0}
            // this shouldn't be a problem for us - if it is,
            // then it's not a JRE bug
            //if (4 + mergedDocs * 16 != tvxSize)
            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
            //  // we detect that the bug has struck, here, and
            //  // throw an exception to prevent the corruption from
            //  // entering the index.  See LUCENE-1282 for
            //  // details.
            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
        }
		/// <summary> 1. Get a norm from the original reader 2. Clone the original reader 3.
		/// Delete a document and set the norm of the cloned reader 4. Verify the norms
		/// are not the same on each reader 5. Verify the doc deleted is only in the
		/// cloned reader 6. Try to delete a document in the original reader, an
		/// exception should be thrown
		/// 
		/// </summary>
		/// <param name="r1">IndexReader to perform tests on
		/// </param>
		/// <throws>  Exception </throws>
		private void  PerformDefaultTests(IndexReader r1)
		{
			float norm1 = Similarity.DecodeNorm(r1.Norms("field1")[4]);
			
			IndexReader pr1Clone = (IndexReader) r1.Clone();
			pr1Clone.DeleteDocument(10);
			pr1Clone.SetNorm(4, "field1", 0.5f);
			Assert.IsTrue(Similarity.DecodeNorm(r1.Norms("field1")[4]) == norm1);
			Assert.IsTrue(Similarity.DecodeNorm(pr1Clone.Norms("field1")[4]) != norm1);
			
			Assert.IsTrue(!r1.IsDeleted(10));
			Assert.IsTrue(pr1Clone.IsDeleted(10));
			
			// try to update the original reader, which should throw an exception
            Assert.Throws<LockObtainFailedException>(() => r1.DeleteDocument(11),
		                             "Tried to delete doc 11 and an exception should have been thrown");
			pr1Clone.Close();
		}
		/// <summary> 1. Get a norm from the original reader 2. Clone the original reader 3.
		/// Delete a document and set the norm of the cloned reader 4. Verify the norms
		/// are not the same on each reader 5. Verify the doc deleted is only in the
		/// cloned reader 6. Try to delete a document in the original reader, an
		/// exception should be thrown
		/// 
		/// </summary>
		/// <param name="r1">IndexReader to perform tests on
		/// </param>
		/// <throws>  Exception </throws>
		private void  PerformDefaultTests(IndexReader r1)
		{
			float norm1 = Similarity.DecodeNorm(r1.Norms("field1")[4]);
			
			IndexReader pr1Clone = (IndexReader) r1.Clone();
			pr1Clone.DeleteDocument(10);
			pr1Clone.SetNorm(4, "field1", 0.5f);
			Assert.IsTrue(Similarity.DecodeNorm(r1.Norms("field1")[4]) == norm1);
			Assert.IsTrue(Similarity.DecodeNorm(pr1Clone.Norms("field1")[4]) != norm1);
			
			Assert.IsTrue(!r1.IsDeleted(10));
			Assert.IsTrue(pr1Clone.IsDeleted(10));
			
			// try to update the original reader, which should throw an exception
			try
			{
				r1.DeleteDocument(11);
				Assert.Fail("Tried to delete doc 11 and an exception should have been thrown");
			}
			catch (System.Exception exception)
			{
				// expectted
			}
			pr1Clone.Close();
		}
        private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, IEnumerable<string> fieldsToRead,IndexReader reader)
        {
            foreach (var field in fieldsToRead)
            {
	            var items = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>[reader.MaxDoc];
                using (var termDocs = reader.TermDocs())
                {
                    using (var termEnum = reader.Terms(new Term(field)))
                    {
                        do
                        {
                            if (termEnum.Term == null || field != termEnum.Term.Field)
                                break;

                            Term term = termEnum.Term;
                            if (LowPrecisionNumber(term.Field, term.Text))
                                continue;


                            var totalDocCountIncludedDeletes = termEnum.DocFreq();
                            termDocs.Seek(termEnum.Term);

                            while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                            {
                                totalDocCountIncludedDeletes -= 1;
                                if (reader.IsDeleted(termDocs.Doc))
                                    continue;
								if(items[termDocs.Doc] == null)
									items[termDocs.Doc] = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>();

	                            items[termDocs.Doc].AddLast(new IndexSearcherHolder.IndexSearcherHoldingState.CacheVal
	                            {
		                            Term = termEnum.Term
	                            });
                            }
                        } while (termEnum.Next());
                    }
                }
	            state.SetInCache(field, items);
            }
        }
Exemple #30
0
 public override bool IsDeleted(int n)
 {
     return(in_Renamed.IsDeleted(n));
 }
Exemple #31
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            if (mergeDocStores)
            {
                // If the i'th reader is a SegmentReader and has
                // identical fieldName -> number mapping, then this
                // array will be non-null at position i:
                SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];

                // If this reader is a SegmentReader, and all of its
                // field name -> number mappings match the "merged"
                // FieldInfos, then we can do a bulk copy of the
                // stored fields:
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    if (reader is SegmentReader)
                    {
                        SegmentReader segmentReader     = (SegmentReader)reader;
                        bool          same              = true;
                        FieldInfos    segmentFieldInfos = segmentReader.GetFieldInfos();
                        for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
                        {
                            same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
                        }
                        if (same)
                        {
                            matchingSegmentReaders[i] = segmentReader;
                        }
                    }
                }

                // Used for bulk-reading raw bytes for stored fields
                int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];

                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        if (matchingSegmentReader != null)
                        {
                            matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
                        }
                        else
                        {
                            matchingFieldsReader = null;
                        }
                        int maxDoc = reader.MaxDoc();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!reader.IsDeleted(j))
                            {
                                // skip deleted docs
                                if (matchingSegmentReader != null)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                    }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
Exemple #32
0
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL);

            System.Collections.Generic.ICollection<IFieldable> fields1 = null;
            System.Collections.Generic.ICollection<IFieldable> fields2 = null;

            Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator();
            System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator();
			while (it1.MoveNext() && it2.MoveNext())
			{
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fieldsNames1.GetEnumerator();
			while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				if (norms1 != null && norms2 != null)
				{
					Assert.AreEqual(norms1.Length, norms2.Length);
					for (int i = 0; i < norms1.Length; i++)
					{
						Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
					}
				}
				else
				{
					Assert.AreSame(norms1, norms2);
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext() && it2.MoveNext())
					{
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary.");
				tp1.Seek(enum1.Term);
				tp2.Seek(enum1.Term);
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + ".");
					Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + ".");
					for (int i = 0; i < tp1.Freq; i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + ".");
					}
				}
			}
		}
Exemple #33
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        bool          hasMatchingReader;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = null;
                                hasMatchingReader    = false;
                            }
                            else
                            {
                                matchingFieldsReader = fieldsReader;
                                hasMatchingReader    = true;
                            }
                        }
                        else
                        {
                            hasMatchingReader    = false;
                            matchingFieldsReader = null;
                        }
                        int  maxDoc       = reader.MaxDoc();
                        bool hasDeletions = reader.HasDeletions();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!hasDeletions || !reader.IsDeleted(j))
                            { // skip deleted docs
                                if (hasMatchingReader)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                        if (j >= maxDoc)
                                        {
                                            break;
                                        }
                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
                                        {
                                            j++;
                                            break;
                                        }
                                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    // NOTE: it's very important to first assign
                                    // to doc then pass it to
                                    // termVectorsWriter.addAllDocVectors; see
                                    // LUCENE-1282
                                    Document doc = reader.Document(j, fieldSelectorMerge);
                                    fieldsWriter.AddDocument(doc);
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                // {{dougsale-2.4.0}
                // this shouldn't be a problem for us - if it is,
                // then it's not a JRE bug...
                //if (4+docCount*8 != fdxFileLength)
                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                //  // we detect that the bug has struck, here, and
                //  // throw an exception to prevent the corruption from
                //  // entering the index.  See LUCENE-1282 for
                //  // details.
                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");
            }
            else
            {
                // If we are skipping the doc stores, that means there
                // are no deletions in any of these segments, so we
                // just sum numDocs() of each segment to get total docCount
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }