Example #1
0
        public virtual void  TestLazyFieldsAfterClose()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
            loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
            lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector, null);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
            reader.Dispose();

            Assert.Throws <AlreadyClosedException>(() => { var value = field.StringValue(null); },
                                                   "did not hit AlreadyClosedException as expected");
        }
Example #2
0
        private void Initialize(SegmentInfo si)
        {
            segment = si.name;

            // Use compound file directory for some files, if it exists
            Directory cfsDir = Directory();
            if (Directory().FileExists(segment + ".cfs"))
            {
                cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                cfsDir = cfsReader;
            }

            // No compound file exists - use the multi-file format
            fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
            fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

            tis = new TermInfosReader(cfsDir, segment, fieldInfos);

            // NOTE: the bitvector is stored using the regular directory, not cfs
            if (HasDeletions(si))
                deletedDocs = new BitVector(Directory(), segment + ".del");

            // make sure that all index files have been read or are kept open
            // so that if an index update removes them we'll still have them
            freqStream = cfsDir.OpenInput(segment + ".frq");
            proxStream = cfsDir.OpenInput(segment + ".prx");
            OpenNorms(cfsDir);

            if (fieldInfos.HasVectors())
            {
                // open term vector files only as needed
                termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
            }
        }
        public virtual void  TestLoadFirst()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
            Document doc = reader.Doc(0, fieldSelector);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            int count = 0;

            System.Collections.IList l = doc.GetFields();
            for (System.Collections.IEnumerator iter = l.GetEnumerator(); iter.MoveNext();)
            {
                Field field = (Field)iter.Current;
                Assert.IsTrue(field != null, "field is null and it shouldn't be");
                System.String sv = field.StringValue();
                Assert.IsTrue(sv != null, "sv is null and it shouldn't be");
                count++;
            }
            Assert.IsTrue(count == 1, count + " does not equal: " + 1);
        }
Example #4
0
        public override void CheckIntegrity()
        {
            EnsureOpen();

            // stored fields
            FieldsReader.CheckIntegrity();

            // term vectors
            TermVectorsReader termVectorsReader = TermVectorsReader;
            if (termVectorsReader != null)
            {
                termVectorsReader.CheckIntegrity();
            }

            // terms/postings
            if (core.fields != null)
            {
                core.fields.CheckIntegrity();
            }

            // norms
            if (core.normsProducer != null)
            {
                core.normsProducer.CheckIntegrity();
            }

            // docvalues
            if (dvProducers != null)
            {
                foreach (DocValuesProducer producer in dvProducers)
                {
                    producer.CheckIntegrity();
                }
            }
        }
Example #5
0
 public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) : base(name, store, index, termVector)
 {
     InitBlock(enclosingInstance);
     this.toRead  = toRead;
     this.pointer = pointer;
     lazy         = true;
 }
Example #6
0
        public virtual void  TestLazyFields()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
            loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
            lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector, null);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
            System.String value_Renamed = field.StringValue(null);
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT);
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF1_TEXT);

            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF2_TEXT);

            field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.StringValue(null) == null, "stringValue isn't null for lazy binary field");

            byte[] bytes = field.GetBinaryValue(null);
            Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be");
            Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, "");
            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched");
            }
        }
Example #7
0
 public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary) : base(name, store, index, termVector)
 {
     InitBlock(enclosingInstance);
     this.toRead   = toRead;
     this.pointer  = pointer;
     this.isBinary = isBinary;
     if (isBinary)
     {
         binaryLength = toRead;
     }
     lazy = true;
 }
 public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed) : base(name, store, Field.Index.NO, Field.TermVector.NO)
 {
     InitBlock(enclosingInstance);
     this.toRead           = toRead;
     this.pointer          = pointer;
     this.internalIsBinary = isBinary;
     if (isBinary)
     {
         internalBinaryLength = toRead;
     }
     lazy = true;
     this.isCompressed = isCompressed;
 }
        public virtual void  TestLoadSize()
        {
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
            Document     doc;

            doc = reader.Doc(0, new AnonymousClassFieldSelector(this));
            Fieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
            Fieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY);
            Fieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);

            Assert.IsTrue(f1.IsBinary());
            Assert.IsTrue(!f3.IsBinary());
            Assert.IsTrue(fb.IsBinary());
            AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.BinaryValue());
            Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue());
            AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.BinaryValue());

            reader.Close();
        }
        public virtual void  Test()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            Document doc = reader.Doc(0, null);

            Assert.IsTrue(doc != null);
            Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null);

            Fieldable field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsTrue(field != null);
            Assert.IsTrue(field.IsTermVectorStored() == true);

            Assert.IsTrue(field.IsStoreOffsetWithTermVector() == true);
            Assert.IsTrue(field.IsStorePositionWithTermVector() == true);
            Assert.IsTrue(field.GetOmitNorms() == false);
            Assert.IsTrue(field.GetOmitTf() == false);

            field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY);
            Assert.IsTrue(field != null);
            Assert.IsTrue(field.IsTermVectorStored() == false);
            Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false);
            Assert.IsTrue(field.IsStorePositionWithTermVector() == false);
            Assert.IsTrue(field.GetOmitNorms() == true);
            Assert.IsTrue(field.GetOmitTf() == false);

            field = doc.GetField(DocHelper.NO_TF_KEY);
            Assert.IsTrue(field != null);
            Assert.IsTrue(field.IsTermVectorStored() == false);
            Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false);
            Assert.IsTrue(field.IsStorePositionWithTermVector() == false);
            Assert.IsTrue(field.GetOmitNorms() == false);
            Assert.IsTrue(field.GetOmitTf() == true);
            reader.Close();
        }
Example #11
0
		public virtual void  Test()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			try
			{
				FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
				Assert.IsTrue(reader != null);
				Assert.IsTrue(reader.Size() == 1);
				Document doc = reader.Doc(0);
				Assert.IsTrue(doc != null);
				Assert.IsTrue(doc.GetField("textField1") != null);
				Field field = doc.GetField("textField2");
				Assert.IsTrue(field != null);
				Assert.IsTrue(field.IsTermVectorStored() == true);
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
Example #12
0
 public virtual void  Test()
 {
     Assert.IsTrue(dir != null);
     Assert.IsTrue(fieldInfos != null);
     try
     {
         FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
         Assert.IsTrue(reader != null);
         Assert.IsTrue(reader.Size() == 1);
         Document doc = reader.Doc(0);
         Assert.IsTrue(doc != null);
         Assert.IsTrue(doc.GetField("textField1") != null);
         Field field = doc.GetField("textField2");
         Assert.IsTrue(field != null);
         Assert.IsTrue(field.IsTermVectorStored() == true);
         reader.Close();
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
        public virtual void  TestLazyFieldsAfterClose()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable();
            SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY);
            System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
            reader.Close();
            try
            {
                field.StringValue();
                Assert.Fail("did not hit AlreadyClosedException as expected");
            }
            catch (AlreadyClosedException e)
            {
                // expected
            }
        }
Example #14
0
			internal void  OpenDocStores(SegmentInfo si)
			{
				lock (this)
				{
					
					System.Diagnostics.Debug.Assert(si.name.Equals(segment));
					
					if (fieldsReaderOrig == null)
					{
						Directory storeDir;
						if (si.GetDocStoreOffset() != - 1)
						{
							if (si.GetDocStoreIsCompoundFile())
							{
								System.Diagnostics.Debug.Assert(storeCFSReader == null);
								storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
								storeDir = storeCFSReader;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
							else
							{
								storeDir = dir;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
						}
						else if (si.GetUseCompoundFile())
						{
							// In some cases, we were originally opened when CFS
							// was not used, but then we are asked to open doc
							// stores after the segment has switched to CFS
							if (cfsReader == null)
							{
								cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
							}
							storeDir = cfsReader;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						else
						{
							storeDir = dir;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						
						System.String storesSegment;
						if (si.GetDocStoreOffset() != - 1)
						{
							storesSegment = si.GetDocStoreSegment();
						}
						else
						{
							storesSegment = segment;
						}
						
						fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						
						// Verify two sources of "maxDoc" agree:
						if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount)
						{
							throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount);
						}
						
						if (fieldInfos.HasVectors())
						{
							// open term vector files only as needed
							termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						}
					}
				}
			}
Example #15
0
		public virtual void  TestLazyFields()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
			System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable();
			SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY);
			System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
			//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
			SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
			Document doc = reader.Doc(0, fieldSelector);
			Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
			Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
			System.String value_Renamed = field.StringValue();
			Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
			Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT);
			field = doc.GetFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
			Assert.IsTrue(field.BinaryValue() == null, "binaryValue isn't null for lazy string field");
			value_Renamed = field.StringValue();
			Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
			Assert.IsTrue(value_Renamed.Equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT);
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be");
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be");
			Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT);
			
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy() == true, "Field is lazy and it should not be");
			Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT);
			
			field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.StringValue() == null, "stringValue isn't null for lazy binary field");
			
			byte[] bytes = field.BinaryValue();
			Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be");
			Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, "");
			for (int i = 0; i < bytes.Length; i++)
			{
				Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched");
			}
		}
Example #16
0
		public virtual void  Test()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
			Document doc = reader.Doc(0, null);
			Assert.IsTrue(doc != null);
			Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null);
			
			IFieldable field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY);
			Assert.IsTrue(field != null);
			Assert.IsTrue(field.IsTermVectorStored == true);
			
			Assert.IsTrue(field.IsStoreOffsetWithTermVector == true);
			Assert.IsTrue(field.IsStorePositionWithTermVector == true);
			Assert.IsTrue(field.OmitNorms == false);
			Assert.IsTrue(field.OmitTermFreqAndPositions == false);
			
			field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY);
			Assert.IsTrue(field != null);
			Assert.IsTrue(field.IsTermVectorStored == false);
			Assert.IsTrue(field.IsStoreOffsetWithTermVector == false);
			Assert.IsTrue(field.IsStorePositionWithTermVector == false);
			Assert.IsTrue(field.OmitNorms == true);
			Assert.IsTrue(field.OmitTermFreqAndPositions == false);
			
			field = doc.GetField(DocHelper.NO_TF_KEY);
			Assert.IsTrue(field != null);
			Assert.IsTrue(field.IsTermVectorStored == false);
			Assert.IsTrue(field.IsStoreOffsetWithTermVector == false);
			Assert.IsTrue(field.IsStorePositionWithTermVector == false);
			Assert.IsTrue(field.OmitNorms == false);
			Assert.IsTrue(field.OmitTermFreqAndPositions == true);
			reader.Dispose();
		}
Example #17
0
 private void  InitBlock(FieldsReader enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
Example #18
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #19
0
			public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary):base(name, store, Field.Index.NO, Field.TermVector.NO)
			{
				InitBlock(enclosingInstance);
				this.toRead = toRead;
				this.pointer = pointer;
				this.isBinary = isBinary;
				if (isBinary)
					binaryLength = toRead;
				lazy = true;
			}
        public virtual void  TestLazyPerformance()
        {
            System.String      tmpIODir = SupportClass.AppSettings.Get("tempDir", "");
            System.String      userName = System.Environment.UserName;
            System.String      path     = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName;
            System.IO.FileInfo file     = new System.IO.FileInfo(path);
            _TestUtil.RmDir(file);
            FSDirectory tmpDir = FSDirectory.Open(file);

            Assert.IsTrue(tmpDir != null);

            IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            writer.AddDocument(testDoc);
            writer.Close();

            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader;
            long         lazyTime    = 0;
            long         regularTime = 0;
            int          length      = 50;

            System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new System.Collections.Hashtable(), lazyFieldNames);

            for (int i = 0; i < length; i++)
            {
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
                Assert.IsTrue(reader != null);
                Assert.IsTrue(reader.Size() == 1);

                Document doc;
                doc = reader.Doc(0, null);                 //Load all of them
                Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
                Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy() == false, "field is lazy");
                System.String value_Renamed;
                long          start;
                long          finish;
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this was always 0ms.
                value_Renamed = field.StringValue();
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                Assert.IsTrue(field != null, "field is null and it shouldn't be");
                regularTime += (finish - start);
                reader.Close();
                reader = null;
                doc    = null;
                //Hmmm, are we still in cache???
                System.GC.Collect();
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
                doc    = reader.Doc(0, fieldSelector);
                field  = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy() == true, "field is not lazy");
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this took around 50 - 70ms
                value_Renamed = field.StringValue();
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                lazyTime += (finish - start);
                reader.Close();
            }
            System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
            System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
        }
Example #21
0
        private void  Initialize(SegmentInfo si)
        {
            segment = si.name;
            this.si = si;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                    cfsDir    = cfsReader;
                }

                // No compound file exists - use the multi-file format
                fieldInfos   = new FieldInfos(cfsDir, segment + ".fnm");
                fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

                // Verify two sources of "maxDoc" agree:
                if (fieldsReader.Size() != si.docCount)
                {
                    throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos);

                // NOTE: the bitvector is stored using the regular directory, not cfs
                if (HasDeletions(si))
                {
                    deletedDocs = new BitVector(Directory(), si.GetDelFileName());

                    // Verify # deletes does not exceed maxDoc for this segment:
                    if (deletedDocs.Count() > MaxDoc())
                    {
                        throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
                    }
                }

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq");
                proxStream = cfsDir.OpenInput(segment + ".prx");
                OpenNorms(cfsDir);

                if (fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
        public virtual void  TestLazyFields()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable();
            SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY);
            System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
            //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY);
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
            System.String value_Renamed = field.StringValue();
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT);
            field = doc.GetFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
            Assert.IsTrue(field.BinaryValue() == null, "binaryValue isn't null for lazy string field");
            value_Renamed = field.StringValue();
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            Assert.IsTrue(value_Renamed.Equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT);
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be");
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT);

            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy() == true, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT);

            field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.StringValue() == null, "stringValue isn't null for lazy binary field");

            byte[] bytes = field.BinaryValue();
            Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be");
            Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, "");
            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched");
            }
        }
Example #23
0
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			
			// Use compound file directory for some files, if it exists
			Directory cfsDir = Directory();
			if (Directory().FileExists(segment + ".cfs"))
			{
				cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
				cfsDir = cfsReader;
			}
			
			// No compound file exists - use the multi-file format
			fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
			fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
			
			tis = new TermInfosReader(cfsDir, segment, fieldInfos);
			
			// NOTE: the bitvector is stored using the regular directory, not cfs
			if (HasDeletions(si))
				deletedDocs = new BitVector(Directory(), segment + ".del");
			
			// make sure that all index files have been read or are kept open
			// so that if an index update removes them we'll still have them
			freqStream = cfsDir.OpenInput(segment + ".frq");
			proxStream = cfsDir.OpenInput(segment + ".prx");
			OpenNorms(cfsDir);
			
			if (fieldInfos.HasVectors())
			{
				// open term vector files only as needed
				termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
			}
		}
Example #24
0
			public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer):base(name, store, index, termVector)
			{
				InitBlock(enclosingInstance);
				this.toRead = toRead;
				this.pointer = pointer;
				lazy = true;
			}
Example #25
0
        private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed)
        {
            int count             = 0;
            int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2;
            // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
            int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length;

            IndexReader reader = IndexReader.Open(dir, true);

            try
            {
                // look into sub readers and check if raw merge is on/off
                var readers = new System.Collections.Generic.List <IndexReader>();
                ReaderUtil.GatherSubReaders(readers, reader);
                foreach (IndexReader ir in readers)
                {
                    FieldsReader fr = ((SegmentReader)ir).GetFieldsReader();
                    Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(),
                                  "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index");
                }

                // test that decompression works correctly
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i);
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        IFieldable compressed = d.GetFieldable("compressed");
                        if (int.Parse(d.Get("id")) % 2 == 0)
                        {
                            Assert.IsFalse(compressed.IsBinary);
                            Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue,
                                            "incorrectly decompressed string");
                        }
                        else
                        {
                            Assert.IsTrue(compressed.IsBinary);
                            Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue()),
                                          "incorrectly decompressed binary");
                        }
                    }
                }

                //check if field was decompressed after optimize
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i, new AnonymousFieldSelector());
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves):
                        // ugh, Java uses Big-Endian streams, so we need to do it manually.
                        byte[] encodedSize    = d.GetFieldable("compressed").GetBinaryValue().Take(4).Reverse().ToArray();
                        int    actualSize     = BitConverter.ToInt32(encodedSize, 0);
                        int    compressedSize = int.Parse(d.Get("compressedSize"));
                        bool   binary         = int.Parse(d.Get("id")) % 2 > 0;
                        int    shouldSize     = shouldStillBeCompressed
                                             ? compressedSize
                                             : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
                        Assert.AreEqual(shouldSize, actualSize, "size incorrect");
                        if (!shouldStillBeCompressed)
                        {
                            Assert.IsFalse(compressedSize == actualSize,
                                           "uncompressed field should have another size than recorded in index");
                        }
                    }
                }
                Assert.AreEqual(34 * 2, count, "correct number of tests");
            }
            finally
            {
                reader.Dispose();
            }
        }
Example #26
0
		public virtual void  TestLazyFieldsAfterClose()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
            ISet<string> loadFieldNames = Support.Compatibility.SetFactory.GetSet<string>();
            loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
            loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>();
            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
            lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
			SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
			Document doc = reader.Doc(0, fieldSelector);
			Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
			IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
            reader.Dispose();

		    Assert.Throws<AlreadyClosedException>(() => { var value = field.StringValue; },
		                                          "did not hit AlreadyClosedException as expected");
		}
Example #27
0
		public virtual void  TestLazyPerformance()
		{
			System.String tmpIODir = AppSettings.Get("tempDir", "");
			System.String userName = System.Environment.UserName;
			System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName;
            System.IO.DirectoryInfo file = new System.IO.DirectoryInfo(path);
			_TestUtil.RmDir(file);
			FSDirectory tmpDir = FSDirectory.Open(file);
			Assert.IsTrue(tmpDir != null);
			
			IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.UseCompoundFile = false;
			writer.AddDocument(testDoc);
			writer.Close();
			
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader;
			long lazyTime = 0;
			long regularTime = 0;
			int length = 50;
            ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>();
			lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
			SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Support.Compatibility.SetFactory.GetSet<string>(), lazyFieldNames);
			
			for (int i = 0; i < length; i++)
			{
				reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
				Assert.IsTrue(reader != null);
				Assert.IsTrue(reader.Size() == 1);
				
				Document doc;
				doc = reader.Doc(0, null); //Load all of them
				Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
				IFieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
				Assert.IsTrue(field.IsLazy == false, "field is lazy");
				System.String value_Renamed;
				long start;
				long finish;
				start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
				//On my machine this was always 0ms.
				value_Renamed = field.StringValue;
				finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
				Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
				Assert.IsTrue(field != null, "field is null and it shouldn't be");
				regularTime += (finish - start);
                reader.Dispose();
				reader = null;
				doc = null;
				//Hmmm, are we still in cache???
				System.GC.Collect();
				reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
				doc = reader.Doc(0, fieldSelector);
				field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
				Assert.IsTrue(field.IsLazy == true, "field is not lazy");
				start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
				//On my machine this took around 50 - 70ms
				value_Renamed = field.StringValue;
				finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
				Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
				lazyTime += (finish - start);
                reader.Dispose();
			}
			System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
			System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
		}
		public virtual void  Test()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, segmentName, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
			Lucene.Net.Documents.Document doc = reader.Doc(0, null);
			Assert.IsTrue(doc != null);
			Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null);
			
			Field field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY);
			Assert.IsTrue(field != null);
			Assert.IsTrue(field.IsTermVectorStored() == true);
			
			Assert.IsTrue(field.IsStoreOffsetWithTermVector() == true);
			Assert.IsTrue(field.IsStorePositionWithTermVector() == true);
			Assert.IsTrue(field.GetOmitNorms() == false);
			
			field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY);
			Assert.IsTrue(field != null);
			Assert.IsTrue(field.IsTermVectorStored() == false);
			Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false);
			Assert.IsTrue(field.IsStorePositionWithTermVector() == false);
			Assert.IsTrue(field.GetOmitNorms() == true);
			
			
			reader.Close();
		}
Example #29
0
 public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed)
     : base(name, store, index, termVector)
 {
     InitBlock(enclosingInstance);
     this.toRead = toRead;
     this.pointer = pointer;
     this.internalIsBinary = isBinary;
     if (isBinary)
         internalBinaryLength = toRead;
     lazy = true;
     this.isCompressed = isCompressed;
 }
Example #30
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #31
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        bool          hasMatchingReader;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = null;
                                hasMatchingReader    = false;
                            }
                            else
                            {
                                matchingFieldsReader = fieldsReader;
                                hasMatchingReader    = true;
                            }
                        }
                        else
                        {
                            hasMatchingReader    = false;
                            matchingFieldsReader = null;
                        }
                        int  maxDoc       = reader.MaxDoc();
                        bool hasDeletions = reader.HasDeletions();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!hasDeletions || !reader.IsDeleted(j))
                            { // skip deleted docs
                                if (hasMatchingReader)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                        if (j >= maxDoc)
                                        {
                                            break;
                                        }
                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
                                        {
                                            j++;
                                            break;
                                        }
                                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    // NOTE: it's very important to first assign
                                    // to doc then pass it to
                                    // termVectorsWriter.addAllDocVectors; see
                                    // LUCENE-1282
                                    Document doc = reader.Document(j, fieldSelectorMerge);
                                    fieldsWriter.AddDocument(doc);
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                // {{dougsale-2.4.0}
                // this shouldn't be a problem for us - if it is,
                // then it's not a JRE bug...
                //if (4+docCount*8 != fdxFileLength)
                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                //  // we detect that the bug has struck, here, and
                //  // throw an exception to prevent the corruption from
                //  // entering the index.  See LUCENE-1282 for
                //  // details.
                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");
            }
            else
            {
                // If we are skipping the doc stores, that means there
                // are no deletions in any of these segments, so we
                // just sum numDocs() of each segment to get total docCount
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
		public virtual void  TestLoadSize()
		{
			FieldsReader reader = new FieldsReader(dir, segmentName, fieldInfos);
			Lucene.Net.Documents.Document doc;
			
			doc = reader.Doc(0, new AnonymousClassFieldSelector(this));
			Fieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
			Fieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY);
			Fieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
			Assert.IsTrue(f1.IsBinary());
			Assert.IsTrue(!f3.IsBinary());
			Assert.IsTrue(fb.IsBinary());
			AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.BinaryValue());
			Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue());
			AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.BinaryValue());
			
			reader.Close();
		}
Example #33
0
        private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #34
0
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			this.si = si;
			
			bool success = false;
			
			try
			{
				// Use compound file directory for some files, if it exists
				Directory cfsDir = Directory();
				if (si.GetUseCompoundFile())
				{
					cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
					cfsDir = cfsReader;
				}
				
				// No compound file exists - use the multi-file format
				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
				fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
				
				// Verify two sources of "maxDoc" agree:
				if (fieldsReader.Size() != si.docCount)
				{
					throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
				}
				
				tis = new TermInfosReader(cfsDir, segment, fieldInfos);
				
				// NOTE: the bitvector is stored using the regular directory, not cfs
				if (HasDeletions(si))
				{
					deletedDocs = new BitVector(Directory(), si.GetDelFileName());
					
					// Verify # deletes does not exceed maxDoc for this segment:
					if (deletedDocs.Count() > MaxDoc())
					{
						throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
					}
				}
				
				// make sure that all index files have been read or are kept open
				// so that if an index update removes them we'll still have them
				freqStream = cfsDir.OpenInput(segment + ".frq");
				proxStream = cfsDir.OpenInput(segment + ".prx");
				OpenNorms(cfsDir);
				
				if (fieldInfos.HasVectors())
				{
					// open term vector files only as needed
					termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
				}
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					DoClose();
				}
			}
		}
Example #35
0
        private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc;
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #36
0
		public virtual void  TestLazyFields()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
            ISet<string> loadFieldNames = Support.Compatibility.SetFactory.GetSet<string>();
			loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
			loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>();
			//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
			lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
			lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
			lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
			lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
			SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
			Document doc = reader.Doc(0, fieldSelector);
			Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
			IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
			System.String value_Renamed = field.StringValue;
			Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
			Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT);
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
			Assert.IsTrue(field.StringValue.Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue + " is not equal to " + DocHelper.FIELD_UTF1_TEXT);
			
			field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be");
			Assert.IsTrue(field.StringValue.Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue + " is not equal to " + DocHelper.FIELD_UTF2_TEXT);
			
			field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.StringValue == null, "stringValue isn't null for lazy binary field");
			
			byte[] bytes = field.GetBinaryValue();
			Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be");
			Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, "");
			for (int i = 0; i < bytes.Length; i++)
			{
				Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched");
			}
		}
Example #37
0
		public virtual void  TestLazyFieldsAfterClose()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
			System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable();
			SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY);
			System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY);
			SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
			SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
			Document doc = reader.Doc(0, fieldSelector);
			Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
			Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);
			Assert.IsTrue(field != null, "field is null and it shouldn't be");
			Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be");
			reader.Close();
			try
			{
				field.StringValue();
				Assert.Fail("did not hit AlreadyClosedException as expected");
			}
			catch (AlreadyClosedException e)
			{
				// expected
			}
		}
Example #38
0
		public virtual void  TestLoadFirst()
		{
			Assert.IsTrue(dir != null);
			Assert.IsTrue(fieldInfos != null);
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Assert.IsTrue(reader != null);
			Assert.IsTrue(reader.Size() == 1);
			LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
			Document doc = reader.Doc(0, fieldSelector);
			Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
			int count = 0;
			var l = doc.GetFields();
			for (System.Collections.IEnumerator iter = l.GetEnumerator(); iter.MoveNext(); )
			{
				Field field = (Field) iter.Current;
				Assert.IsTrue(field != null, "field is null and it shouldn't be");
				System.String sv = field.StringValue;
				Assert.IsTrue(sv != null, "sv is null and it shouldn't be");
				count++;
			}
			Assert.IsTrue(count == 1, count + " does not equal: " + 1);
		}
        private void  Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment             = si.name;
            this.si             = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir    = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != -1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir       = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                {
                    storeDir = null;
                }

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx   = false;
                int  numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                {
                    if (!fieldInfos.FieldInfo(i).omitTf)
                    {
                        anyProx = true;
                    }
                }

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != -1)
                {
                    fieldsSegment = si.GetDocStoreSegment();
                }
                else
                {
                    fieldsSegment = segment;
                }

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                {
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                }
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != -1)
                    {
                        vectorsSegment = si.GetDocStoreSegment();
                    }
                    else
                    {
                        vectorsSegment = segment;
                    }
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
Example #40
0
		public virtual void  TestLoadSize()
		{
			FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
			Document doc;
			
			doc = reader.Doc(0, new AnonymousClassFieldSelector(this));
			IFieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
			IFieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY);
			IFieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
			Assert.IsTrue(f1.IsBinary);
			Assert.IsTrue(!f3.IsBinary);
			Assert.IsTrue(fb.IsBinary);
			AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.GetBinaryValue());
			Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue);
            AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.GetBinaryValue());

            reader.Dispose();
		}
Example #41
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos(); // merge field names
            }

            foreach (IndexReader reader in readers)
            {
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    foreach (IndexReader reader in readers)
                    {
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions)
                        {
                            docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Dispose();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                foreach (IndexReader reader in readers)
                {
                    docCount += reader.NumDocs();
                }
            }

            return(docCount);
        }
Example #42
0
 private void InitBlock(FieldsReader enclosingInstance)
 {
     this.Enclosing_Instance = enclosingInstance;
 }
        private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment = si.name;
            this.si = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != - 1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                    storeDir = null;

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx = false;
                int numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                    if (!fieldInfos.FieldInfo(i).omitTf)
                        anyProx = true;

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != - 1)
                    fieldsSegment = si.GetDocStoreSegment();
                else
                    fieldsSegment = segment;

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != - 1)
                        vectorsSegment = si.GetDocStoreSegment();
                    else
                        vectorsSegment = segment;
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {

                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }