public override void  SetUp()
		{
			base.SetUp();
			DocHelper.SetupDoc(testDoc);
			SegmentInfo info = DocHelper.WriteDoc(dir, testDoc);
			reader = SegmentReader.Get(info);
		}
        public virtual void TestDocument()
        {
            SegmentReader[] readers = new SegmentReader[2];
            Directory dir = NewDirectory();
            Document doc1 = new Document();
            Document doc2 = new Document();
            DocHelper.SetupDoc(doc1);
            DocHelper.SetupDoc(doc2);
            DocHelper.WriteDoc(Random(), dir, doc1);
            DocHelper.WriteDoc(Random(), dir, doc2);
            DirectoryReader reader = DirectoryReader.Open(dir);
            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader is StandardDirectoryReader);

            Document newDoc1 = reader.Document(0);
            Assert.IsTrue(newDoc1 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = reader.Document(1);
            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.Unstored.Count);
            Terms vector = reader.GetTermVectors(0).Terms(DocHelper.TEXT_FIELD_2_KEY);
            Assert.IsNotNull(vector);

            reader.Dispose();
            if (readers[0] != null)
            {
                readers[0].Dispose();
            }
            if (readers[1] != null)
            {
                readers[1].Dispose();
            }
            dir.Dispose();
        }
        static void CreateOpenBitSets(SegmentReader reader, IDictionary<string, HashSet<string>> feeds, IDictionary<string, IDictionary<string, OpenBitSet>> bitSetLookup)
        {
            for (int n = 0; n < reader.MaxDoc; n++)
            {
                if (reader.IsDeleted(n))
                {
                    continue;
                }

                Document document = reader.Document(n);

                string id = document.Get("Id");

                if (id == null)
                {
                    continue;
                }

                foreach (var feed in feeds)
                {
                    if (feed.Value.Contains(id))
                    {
                        bitSetLookup[feed.Key][reader.SegmentName].Set(n);
                    }
                }
            }
        }
 protected internal override object DoBody(string segmentFileName)
 {
     var sis = new SegmentInfos();
     sis.Read(directory, segmentFileName);
     var readers = new SegmentReader[sis.Size()];
     for (int i = sis.Size() - 1; i >= 0; i--)
     {
         System.IO.IOException prior = null;
         bool success = false;
         try
         {
             readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
             success = true;
         }
         catch (System.IO.IOException ex)
         {
             prior = ex;
         }
         finally
         {
             if (!success)
             {
                 IOUtils.CloseWhileHandlingException(prior, readers);
             }
         }
     }
     return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
 }
Beispiel #5
0
			public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
			{
				InitBlock(enclosingInstance);
				this.in_Renamed = in_Renamed;
				this.number = number;
				this.normSeek = normSeek;
			}
		protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc)
		{
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
		}
Beispiel #7
0
		public virtual void  TestBadSeek()
		{
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term("textField2", "bad"));
				Assert.IsTrue(segTermDocs.Next() == false);
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term("junk", "bad"));
				Assert.IsTrue(segTermDocs.Next() == false);
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
        private int[] starts; // 1st docno for each segment

        #endregion Fields

        #region Constructors

        /// <summary>Construct reading the named set of readers. </summary>
        internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly)
            : base(directory, sis, closeDirectory, readOnly)
        {
            // To reduce the chance of hitting FileNotFound
            // (and having to retry), we open segments in
            // reverse because IndexWriter merges & deletes
            // the newest segments first.

            SegmentReader[] readers = new SegmentReader[sis.Count];
            for (int i = sis.Count - 1; i >= 0; i--)
            {
                try
                {
                    readers[i] = SegmentReader.Get(readOnly, sis.Info(i));
                }
                catch (System.IO.IOException e)
                {
                    // Close all readers we had opened:
                    for (i++; i < sis.Count; i++)
                    {
                        try
                        {
                            readers[i].Close();
                        }
                        catch (System.IO.IOException)
                        {
                            // keep going - we want to clean up as much as possible
                        }
                    }
                    throw e;
                }
            }

            Initialize(readers);
        }
Beispiel #9
0
 public override void  SetUp()
 {
     base.SetUp();
     DocHelper.SetupDoc(testDoc);
     SegmentInfo info = DocHelper.WriteDoc(dir, testDoc);
     reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
 }
Beispiel #10
0
		public SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (IndexInput) parent.freqStream.Clone();
			this.deletedDocs = parent.deletedDocs;
			this.skipInterval = parent.tis.GetSkipInterval();
		}
Beispiel #11
0
 protected internal AllTermDocs(SegmentReader parent)
 {
     lock (parent)
     {
         this.deletedDocs = parent.deletedDocs;
     }
     this.maxDoc = parent.MaxDoc();
 }
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     DocHelper.SetupDoc(TestDoc);
     SegmentCommitInfo info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
     Reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ);
 }
Beispiel #13
0
 public override void  SetUp()
 {
     base.SetUp();
     DocHelper.SetupDoc(doc1);
     SegmentInfo info1 = DocHelper.WriteDoc(merge1Dir, doc1);
     DocHelper.SetupDoc(doc2);
     SegmentInfo info2 = DocHelper.WriteDoc(merge2Dir, doc2);
     reader1 = SegmentReader.Get(true, info1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
     reader2 = SegmentReader.Get(true, info2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
 }
Beispiel #14
0
		public override void  SetUp()
		{
			base.SetUp();
			DocHelper.SetupDoc(doc1);
			SegmentInfo info1 = DocHelper.WriteDoc(merge1Dir, doc1);
			DocHelper.SetupDoc(doc2);
			SegmentInfo info2 = DocHelper.WriteDoc(merge2Dir, doc2);
			reader1 = SegmentReader.Get(info1);
			reader2 = SegmentReader.Get(info2);
		}
Beispiel #15
0
		public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (IndexInput) parent.core.freqStream.Clone();
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
			this.skipInterval = parent.core.GetTermsReader().GetSkipInterval();
			this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels();
		}
Beispiel #16
0
 /*protected internal*/
 public SegmentTermDocs(SegmentReader parent)
 {
     this.parent = parent;
     this.freqStream = (IndexInput) parent.core.freqStream.Clone();
     lock (parent)
     {
         this.deletedDocs = parent.deletedDocs;
     }
     this.skipInterval = parent.core.GetTermsReader().SkipInterval;
     this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels;
 }
 public override void SetUp()
 {
     base.SetUp();
     MergedDir = NewDirectory();
     Merge1Dir = NewDirectory();
     Merge2Dir = NewDirectory();
     DocHelper.SetupDoc(Doc1);
     SegmentCommitInfo info1 = DocHelper.WriteDoc(Random(), Merge1Dir, Doc1);
     DocHelper.SetupDoc(Doc2);
     SegmentCommitInfo info2 = DocHelper.WriteDoc(Random(), Merge2Dir, Doc2);
     Reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
     Reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
 }
Beispiel #18
0
		protected virtual void  SetUp()
		{
			
			try
			{
				DocHelper.SetupDoc(testDoc);
				DocHelper.WriteDoc(dir, testDoc);
				reader = new SegmentReader(new SegmentInfo("test", 1, dir));
			}
			catch (System.IO.IOException e)
			{
				
			}
		}
        public virtual void TestAddDocument()
        {
            Document testDoc = new Document();
            DocHelper.SetupDoc(testDoc);
            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            writer.AddDocument(testDoc);
            writer.Commit();
            SegmentCommitInfo info = writer.NewestSegment();
            writer.Dispose();
            //After adding the document, we should be able to read it back in
            SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
            Assert.IsTrue(reader != null);
            Document doc = reader.Document(0);
            Assert.IsTrue(doc != null);

            //System.out.println("Document: " + doc);
            IndexableField[] fields = doc.GetFields("textField2");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_2_TEXT));
            Assert.IsTrue(fields[0].FieldType().StoreTermVectors);

            fields = doc.GetFields("textField1");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_1_TEXT));
            Assert.IsFalse(fields[0].FieldType().StoreTermVectors);

            fields = doc.GetFields("keyField");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.KEYWORD_TEXT));

            fields = doc.GetFields(DocHelper.NO_NORMS_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.NO_NORMS_TEXT));

            fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_3_TEXT));

            // test that the norms are not present in the segment if
            // omitNorms is true
            foreach (FieldInfo fi in reader.FieldInfos)
            {
                if (fi.Indexed)
                {
                    Assert.IsTrue(fi.OmitsNorms() == (reader.GetNormValues(fi.Name) == null));
                }
            }
            reader.Dispose();
        }
Beispiel #20
0
		internal static void  Merge(System.String seg1, System.String seg2, System.String merged)
		{
			Directory directory = FSDirectory.GetDirectory("test", false);
			
			SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
			SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
			
			SegmentMerger merger = new SegmentMerger(directory, merged, false);
			merger.Add(r1);
			merger.Add(r2);
			merger.Merge();
			merger.CloseReaders();
			
			directory.Close();
		}
		// This is needed if for the test to pass and mimic what happens wiht JUnit
		// For some reason, JUnit is creating a new member variable for each sub-test
		// but NUnit is not -- who is wrong/right, I don't know.
		private void SetUpInternal()        // {{Aroush-1.9}} See note above
		{
			//The variables for the new merged segment
			mergedDir = new RAMDirectory();
			mergedSegment = "test";
			//First segment to be merged
			merge1Dir = new RAMDirectory();
			doc1 = new Lucene.Net.Documents.Document();
			//merge1Segment = "test-1";
			reader1 = null;
			//Second Segment to be merged
			merge2Dir = new RAMDirectory();
			doc2 = new Lucene.Net.Documents.Document();
			//merge2Segment = "test-2";
			reader2 = null;
		}
Beispiel #22
0
		protected virtual void  SetUp()
		{
			DocHelper.SetupDoc(doc1);
			DocHelper.WriteDoc(merge1Dir, merge1Segment, doc1);
			DocHelper.SetupDoc(doc2);
			DocHelper.WriteDoc(merge2Dir, merge2Segment, doc2);
			try
			{
				reader1 = new SegmentReader(new SegmentInfo(merge1Segment, 1, merge1Dir));
				reader2 = new SegmentReader(new SegmentInfo(merge2Segment, 1, merge2Dir));
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
			}
		}
        public virtual void TestTermDocs(int indexDivisor)
        {
            //After adding the document, we should be able to read it back in
            SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
            Assert.IsTrue(reader != null);
            Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor);

            TermsEnum terms = reader.Fields.Terms(DocHelper.TEXT_FIELD_2_KEY).Iterator(null);
            terms.SeekCeil(new BytesRef("field"));
            DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS);
            if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                int docId = termDocs.DocID();
                Assert.IsTrue(docId == 0);
                int freq = termDocs.Freq();
                Assert.IsTrue(freq == 3);
            }
            reader.Dispose();
        }
Beispiel #24
0
		protected virtual void  SetUp()
		{
			DocHelper.SetupDoc(doc1);
			DocHelper.SetupDoc(doc2);
			DocHelper.WriteDoc(dir, "seg-1", doc1);
			DocHelper.WriteDoc(dir, "seg-2", doc2);
			
			try
			{
				sis.Write(dir);
				reader1 = new SegmentReader(new SegmentInfo("seg-1", 1, dir));
				reader2 = new SegmentReader(new SegmentInfo("seg-2", 1, dir));
				readers[0] = reader1;
				readers[1] = reader2;
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
			}
		}
        public virtual void TestBadSeek(int indexDivisor)
        {
            {
                //After adding the document, we should be able to read it back in
                SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
                Assert.IsTrue(reader != null);
                DocsEnum termDocs = TestUtil.Docs(Random(), reader, "textField2", new BytesRef("bad"), reader.LiveDocs, null, 0);

                Assert.IsNull(termDocs);
                reader.Dispose();
            }
            {
                //After adding the document, we should be able to read it back in
                SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
                Assert.IsTrue(reader != null);
                DocsEnum termDocs = TestUtil.Docs(Random(), reader, "junk", new BytesRef("bad"), reader.LiveDocs, null, 0);
                Assert.IsNull(termDocs);
                reader.Dispose();
            }
        }
Beispiel #26
0
		public virtual void  TestTermDocs()
		{
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field"));
				if (segTermDocs.Next() == true)
				{
					int docId = segTermDocs.Doc();
					Assert.IsTrue(docId == 0);
					int freq = segTermDocs.Freq();
					Assert.IsTrue(freq == 3);
				}
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Beispiel #27
0
		public virtual void  TestAddDocument()
		{
			Analyzer analyzer = new WhitespaceAnalyzer();
			Similarity similarity = Similarity.GetDefault();
			DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
			Assert.IsTrue(writer != null);
			try
			{
				writer.AddDocument("test", testDoc);
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
				Assert.IsTrue(reader != null);
				Document doc = reader.Document(0);
				Assert.IsTrue(doc != null);
				
				//System.out.println("Document: " + doc);
				Field[] fields = doc.GetFields("textField2");
				Assert.IsTrue(fields != null && fields.Length == 1);
				Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT));
				Assert.IsTrue(fields[0].IsTermVectorStored() == true);
				
				fields = doc.GetFields("textField1");
				Assert.IsTrue(fields != null && fields.Length == 1);
				Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT));
				Assert.IsTrue(fields[0].IsTermVectorStored() == false);
				
				fields = doc.GetFields("keyField");
				Assert.IsTrue(fields != null && fields.Length == 1);
				Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT));
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Count;

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
#pragma warning disable 168
                        catch (Exception th)
#pragma warning restore 168
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
Beispiel #29
0
        // Writes field updates (new _X_N updates files) to the directory
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                Debug.Assert(dvUpdates.Any());

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader == null ? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE) : this.reader;
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.fType);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.DisposeWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception)
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!mergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!mergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.SetGenUpdatesFiles(newGenUpdatesFiles);

                // wrote new files, should checkpoint()
                writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                    try
                    {
                        reader.DecRef();
                        reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }
Beispiel #30
0
 internal MySegmentTermDocs(SegmentReader p) : base(p)
 {
 }
Beispiel #31
0
 internal SegmentTermPositions(SegmentReader p) : base(p)
 {
     this.proxStream = null;             // the proxStream will be cloned lazily when nextPosition() is called for the first time
 }
 public override void Run()
 {
     if (VERBOSE)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.CurrentSearcher;
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     string source;
                     diagnostics.TryGetValue("source", out source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    !outerInstance.assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetIterator(null);
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount.Get() < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount.Get() / 30;
                         shift   = Random().Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         BytesRef term = termsEnum.Next();
                         if (term == null)
                         {
                             totTermCount.Set(seenTermCount);
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.failed.Set(true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(()
                                                                               => (StoredFieldsReader)fieldsReaderOrig.Clone());
            termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(()
                                                                             => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone());

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fields != null);
                }
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsProducer != null);
                    }
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Beispiel #35
0
 private void  AssertDocDeleted(SegmentReader reader, SegmentReader reader2, int doc)
 {
     Assert.AreEqual(reader.IsDeleted(doc), reader2.IsDeleted(doc));
 }
Beispiel #36
0
 internal MySegmentTermDocs(SegmentReader p, IState state) : base(p, state)
 {
 }
Beispiel #37
0
 private void  InitBlock(SegmentReader enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
Beispiel #38
0
        /// <summary>Merges all segments together into a single segment, optimizing an index
        /// for search.
        /// </summary>
        public virtual void  Optimize()
        {
            lock (this)
            {
                FlushRamSegments();
                while (segmentInfos.Count > 1 || (segmentInfos.Count == 1 && (SegmentReader.HasDeletions(segmentInfos.Info(0)) || segmentInfos.Info(0).dir != directory || (useCompoundFile && (!SegmentReader.UsesCompoundFile(segmentInfos.Info(0)) || SegmentReader.HasSeparateNorms(segmentInfos.Info(0)))))))
                {
                    int minSegment = segmentInfos.Count - mergeFactor;
                    MergeSegments(minSegment < 0?0:minSegment);
                }

                segmentInfos.Optimize(directory);
            }
        }
        // Delete by Term
        private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            lock (this)
            {
                long   delCount = 0;
                Fields fields   = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return(0);
                }

                TermsEnum termsEnum = null;

                string   currentField = null;
                DocsEnum docs         = null;

                Debug.Assert(CheckDeleteTerm(null));

                bool any = false;

                //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
                foreach (Term term in termsIter)
                {
                    // Since we visit terms sorted, we gain performance
                    // by re-using the same TermsEnum and seeking only
                    // forwards
                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        Debug.Assert(currentField == null || currentField.CompareToOrdinal(term.Field) < 0);
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    Debug.Assert(CheckDeleteTerm(term));

                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);
                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        if (docsEnum != null)
                        {
                            while (true)
                            {
                                int docID = docsEnum.NextDoc();
                                //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
                                if (docID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (!any)
                                {
                                    rld.InitWritableLiveDocs();
                                    any = true;
                                }
                                // NOTE: there is no limit check on the docID
                                // when deleting by Term (unlike by Query)
                                // because on flush we apply all Term deletes to
                                // each segment.  So all Term deleting here is
                                // against prior segments:
                                if (rld.Delete(docID))
                                {
                                    delCount++;
                                }
                            }
                        }
                    }
                }

                return(delCount);
            }
        }
Beispiel #40
0
 public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
 {
     InitBlock(enclosingInstance);
     this.in_Renamed = in_Renamed;
     this.number     = number;
 }
Beispiel #41
0
 private void  AssertDelDocsRefCountEquals(int refCount, SegmentReader reader)
 {
     Assert.AreEqual(refCount, reader.deletedDocsRef_ForNUnit.RefCount());
 }
Beispiel #42
0
        public virtual void TestFieldNumberGaps()
        {
            int numIters = AtLeast(13);

            for (int i = 0; i < numIters; i++)
            {
                Directory dir = NewDirectory();
                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d1 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d1 second field", Field.Store.YES));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(1, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d2 first field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(2, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d3 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d3 second field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(3, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                    Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                    Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    writer.DeleteDocuments(new Term("f1", "d1"));
                    // nuke the first segment entirely so that the segment with gaps is
                    // loaded first!
                    writer.ForceMergeDeletes();
                    writer.Dispose();
                }

                IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream()));
                writer_.ForceMerge(1);
                writer_.Dispose();

                SegmentInfos sis_ = new SegmentInfos();
                sis_.Read(dir);
                Assert.AreEqual(1, sis_.Size());
                FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0));
                Assert.AreEqual("f1", fis1_.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1_.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis1_.FieldInfo(2).Name);
                dir.Dispose();
            }
        }
Beispiel #43
0
        public override void SetUp()
        {
            base.SetUp();

            /*
             * for (int i = 0; i < testFields.Length; i++) {
             * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
             * }
             */

            Array.Sort(TestTerms);
            int tokenUpto = 0;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                Positions[i] = new int[TERM_FREQ];
                // first position must be 0
                for (int j = 0; j < TERM_FREQ; j++)
                {
                    // positions are always sorted in increasing order
                    Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10);
                    TestToken token = Tokens[tokenUpto++] = new TestToken(this);
                    token.Text        = TestTerms[i];
                    token.Pos         = Positions[i][j];
                    token.StartOffset = j * 10;
                    token.EndOffset   = j * 10 + TestTerms[i].Length;
                }
            }
            Array.Sort(Tokens);

            Dir = NewDirectory();
            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false));

            Document doc = new Document();

            for (int i = 0; i < TestFields.Length; i++)
            {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                if (TestFieldsStorePos[i] && TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                    customType.StoreTermVectorOffsets   = true;
                }
                else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                }
                else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors       = true;
                    customType.StoreTermVectorOffsets = true;
                }
                else
                {
                    customType.StoreTermVectors = true;
                }
                doc.Add(new Field(TestFields[i], "", customType));
            }

            //Create 5 documents for testing, they all have the same
            //terms
            for (int j = 0; j < 5; j++)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            Seg = writer.NewestSegment();
            writer.Dispose();

            FieldInfos = SegmentReader.ReadFieldInfos(Seg);
        }
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.term;
                    int  limit = update.docIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.CompareToOrdinal(term.Field) < 0;
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.field, update.type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.field, update.type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.value);
                        }
                    }
                }
            }
        }
Beispiel #45
0
        public virtual void  TestSegmentReaderDelDocsReferenceCounting()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, false);

            IndexReader   origReader        = IndexReader.Open(dir1, false);
            SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(origReader);

            // deletedDocsRef should be null because nothing has updated yet
            Assert.IsNull(origSegmentReader.deletedDocsRef_ForNUnit);

            // we deleted a document, so there is now a deletedDocs bitvector and a
            // reference to it
            origReader.DeleteDocument(1);
            AssertDelDocsRefCountEquals(1, origSegmentReader);

            // the cloned segmentreader should have 2 references, 1 to itself, and 1 to
            // the original segmentreader
            IndexReader   clonedReader        = (IndexReader)origReader.Clone();
            SegmentReader clonedSegmentReader = SegmentReader.GetOnlySegmentReader(clonedReader);

            AssertDelDocsRefCountEquals(2, origSegmentReader);
            // deleting a document creates a new deletedDocs bitvector, the refs goes to
            // 1
            clonedReader.DeleteDocument(2);
            AssertDelDocsRefCountEquals(1, origSegmentReader);
            AssertDelDocsRefCountEquals(1, clonedSegmentReader);

            // make sure the deletedocs objects are different (copy
            // on write)
            Assert.IsTrue(origSegmentReader.deletedDocs_ForNUnit != clonedSegmentReader.deletedDocs_ForNUnit);

            AssertDocDeleted(origSegmentReader, clonedSegmentReader, 1);
            Assert.IsTrue(!origSegmentReader.IsDeleted(2));  // doc 2 should not be deleted
            // in original segmentreader
            Assert.IsTrue(clonedSegmentReader.IsDeleted(2)); // doc 2 should be deleted in
            // cloned segmentreader

            // deleting a doc from the original segmentreader should throw an exception
            Assert.Throws <LockObtainFailedException>(() => origReader.DeleteDocument(4), "expected exception");

            origReader.Close();
            // try closing the original segment reader to see if it affects the
            // clonedSegmentReader
            clonedReader.DeleteDocument(3);
            clonedReader.Flush();
            AssertDelDocsRefCountEquals(1, clonedSegmentReader);

            // test a reopened reader
            IndexReader   reopenedReader      = clonedReader.Reopen();
            IndexReader   cloneReader2        = (IndexReader)reopenedReader.Clone();
            SegmentReader cloneSegmentReader2 = SegmentReader.GetOnlySegmentReader(cloneReader2);

            AssertDelDocsRefCountEquals(2, cloneSegmentReader2);
            clonedReader.Close();
            reopenedReader.Close();
            cloneReader2.Close();

            dir1.Close();
        }
Beispiel #46
0
			private void  InitBlock(SegmentReader enclosingInstance)
			{
				this.enclosingInstance = enclosingInstance;
			}
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    CfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos_Renamed;

                this.TermsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat();
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                Fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(Fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms())
                {
                    NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState);
                    Debug.Assert(NormsProducer != null);
                }
                else
                {
                    NormsProducer = null;
                }

                StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat();

                try
                {
                    FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context);
                }
                catch (System.AccessViolationException ave)
                {
                }

                //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors()) // open term vector files only as needed
                {
                    TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    TermVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Beispiel #48
0
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader, IState state)
        {
            var status = new Status.TermIndexStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                TermEnum      termEnum      = reader.Terms(state);
                TermPositions termPositions = reader.TermPositions(state);

                // Used only to count up # deleted docs for this term
                var myTermDocs = new MySegmentTermDocs(reader, state);

                int maxDoc = reader.MaxDoc;

                while (termEnum.Next(state))
                {
                    status.termCount++;
                    Term term    = termEnum.Term;
                    int  docFreq = termEnum.DocFreq();
                    termPositions.Seek(term, state);
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next(state))
                    {
                        freq0++;
                        int doc  = termPositions.Doc;
                        int freq = termPositions.Freq;
                        if (doc <= lastDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;
                        if (freq <= 0)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                        }

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                        {
                            int pos = termPositions.NextPosition(state);
                            if (pos < -1)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            }
                            if (pos < lastPos)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            }
                            lastPos = pos;
                        }
                    }

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions)
                    {
                        myTermDocs.Seek(term, state);
                        while (myTermDocs.Next(state))
                        {
                        }
                        delCount = myTermDocs.delCount;
                    }
                    else
                    {
                        delCount = 0;
                    }

                    if (freq0 + delCount != docFreq)
                    {
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                    }
                }

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Beispiel #49
0
        public virtual void TestMerge()
        {
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);

            SegmentMerger merger = new SegmentMerger(new List <AtomicReader> {
                Reader1, Reader2
            }, si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random), true);
            MergeState mergeState = merger.Merge();
            int        docsMerged = mergeState.SegmentInfo.DocCount;

            Assert.IsTrue(docsMerged == 2);
            //Should be able to open a new SegmentReader against the new directory
            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random));

            Assert.IsTrue(mergedReader != null);
            Assert.IsTrue(mergedReader.NumDocs == 2);
            Document newDoc1 = mergedReader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            //There are 2 unstored fields on the document
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = mergedReader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);

            DocsEnum termDocs = TestUtil.Docs(Random, mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);

            Assert.IsTrue(termDocs != null);
            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            int tvCount = 0;

            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
            {
                if (fieldInfo.HasVectors)
                {
                    tvCount++;
                }
            }

            //System.out.println("stored size: " + stored.Size());
            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");

            Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(vector);
            Assert.AreEqual(3, vector.Count);
            TermsEnum termsEnum = vector.GetIterator(null);

            int i = 0;

            while (termsEnum.Next() != null)
            {
                string term = termsEnum.Term.Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq;
                //System.out.println("Term: " + term + " Freq: " + freq);
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1);
                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
                i++;
            }

            TestSegmentReader.CheckNorms(mergedReader);
            mergedReader.Dispose();
        }
Beispiel #50
0
        /// <summary>Returns a <see cref="Status" /> instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments, IState state)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir, state);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int        numSegments      = sis.Count;
            var        segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName, state);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt(state);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.UserData;
            System.String userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                foreach (string s in onlySegments)
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.segmentsChecked.AddRange(onlySegments);
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                var segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile(state));
                    segInfoStat.compound = info.GetUseCompoundFile(state);
                    Msg("    hasProx=" + info.HasProx);
                    segInfoStat.hasProx = info.HasProx;
                    Msg("    numFiles=" + info.Files(state).Count);
                    segInfoStat.numFiles = info.Files(state).Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes(state) / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes(state) / (1024.0 * 1024.0);
                    IDictionary <string, string> diagnostics = info.Diagnostics;
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.DocStoreOffset;
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.DocStoreSegment);
                        segInfoStat.docStoreSegment = info.DocStoreSegment;
                        Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
                        segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, state);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount(state))
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc)
                        {
                            throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount(state))
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount(state) != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount(state) + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader, state);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader, state);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf, state);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf, state);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    const string comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add((SegmentInfo)info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Beispiel #51
0
        public virtual void TestSameFieldNumbersAcrossSegments()
        {
            for (int i = 0; i < 2; i++)
            {
                Directory   dir    = NewDirectory();
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

                Document d1 = new Document();
                d1.Add(new StringField("f1", "first field", Field.Store.YES));
                d1.Add(new StringField("f2", "second field", Field.Store.YES));
                writer.AddDocument(d1);

                if (i == 1)
                {
                    writer.Dispose();
                    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                }
                else
                {
                    writer.Commit();
                }

                Document  d2          = new Document();
                FieldType customType2 = new FieldType(TextField.TYPE_STORED);
                customType2.StoreTermVectors = true;
                d2.Add(new TextField("f2", "second field", Field.Store.NO));
                d2.Add(new Field("f1", "first field", customType2));
                d2.Add(new TextField("f3", "third field", Field.Store.NO));
                d2.Add(new TextField("f4", "fourth field", Field.Store.NO));
                writer.AddDocument(d2);

                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(2, sis.Size());

                FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

                Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis2.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

                writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                writer.ForceMerge(1);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(1, sis.Size());

                FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0));

                Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis3.FieldInfo(3).Name);

                dir.Dispose();
            }
        }
Beispiel #52
0
        public virtual void TestPositions()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document    d        = new Document();

            // f1,f2,f3: docs only
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_ONLY;

            Field f1 = NewField("f1", "this field has docs only", ft);

            d.Add(f1);

            Field f2 = NewField("f2", "this field has docs only", ft);

            d.Add(f2);

            Field f3 = NewField("f3", "this field has docs only", ft);

            d.Add(f3);

            FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);

            ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS;

            // f4,f5,f6 docs and freqs
            Field f4 = NewField("f4", "this field has docs and freqs", ft2);

            d.Add(f4);

            Field f5 = NewField("f5", "this field has docs and freqs", ft2);

            d.Add(f5);

            Field f6 = NewField("f6", "this field has docs and freqs", ft2);

            d.Add(f6);

            FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);

            ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;

            // f7,f8,f9 docs/freqs/positions
            Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3);

            d.Add(f7);

            Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3);

            d.Add(f8);

            Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3);

            d.Add(f9);

            writer.AddDocument(d);
            writer.ForceMerge(1);

            // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
            // and docs/freqs/positions for f3, f6, f9
            d = new Document();

            // f1,f4,f7: docs only
            f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            f4 = NewField("f4", "this field has docs only", ft);
            d.Add(f4);

            f7 = NewField("f7", "this field has docs only", ft);
            d.Add(f7);

            // f2, f5, f8: docs and freqs
            f2 = NewField("f2", "this field has docs and freqs", ft2);
            d.Add(f2);

            f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            f8 = NewField("f8", "this field has docs and freqs", ft2);
            d.Add(f8);

            // f3, f6, f9: docs and freqs and positions
            f3 = NewField("f3", "this field has docs and freqs and positions", ft3);
            d.Add(f3);

            f6 = NewField("f6", "this field has docs and freqs and positions", ft3);
            d.Add(f6);

            f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            // docs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions);
            // docs + docs/freqs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions);
            // docs + docs/freqs/pos = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions);
            // docs/freqs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions);
            // docs/freqs + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions);
            // docs/freqs + docs/freqs/pos = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions);
            // docs/freqs/pos + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions);
            // docs/freqs/pos + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions);
            // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions);

            reader.Dispose();
            ram.Dispose();
        }
        // Delete by query
        private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            long delCount = 0;
            AtomicReaderContext readerContext = reader.AtomicContext;
            bool any = false;

            foreach (QueryAndLimit ent in queriesIter)
            {
                Query    query = ent.Query;
                int      limit = ent.Limit;
                DocIdSet docs  = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs);
                if (docs != null)
                {
                    DocIdSetIterator it = docs.GetIterator();
                    if (it != null)
                    {
                        while (true)
                        {
                            int doc = it.NextDoc();
                            if (doc >= limit)
                            {
                                break;
                            }

                            if (!any)
                            {
                                rld.InitWritableLiveDocs();
                                any = true;
                            }

                            if (rld.Delete(doc))
                            {
                                delCount++;
                            }
                        }
                    }
                }
            }

            return(delCount);
        }
Beispiel #54
0
 /// <summary>
 /// Create new <see cref="SegmentReader"/> sharing core from a previous
 /// <see cref="SegmentReader"/> and loading new live docs from a new
 /// deletes file. Used by <see cref="DirectoryReader.OpenIfChanged(DirectoryReader)"/>.
 /// </summary>
 internal SegmentReader(SegmentCommitInfo si, SegmentReader sr)
     : this(si, sr, si.Info.Codec.LiveDocsFormat.ReadLiveDocs(si.Info.Dir, si, IOContext.READ_ONCE), si.Info.DocCount - si.DelCount)
 {
 }
Beispiel #55
0
 /// <summary> Test term vectors for a segment.</summary>
 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
 {
     var status = new Status.TermVectorStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: term vectors........");
         }
         
         for (int j = 0; j < info.docCount; ++j)
         {
             if (!reader.IsDeleted(j))
             {
                 status.docCount++;
                 ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                 if (tfv != null)
                 {
                     status.totVectors += tfv.Length;
                 }
             }
         }
         
         Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
        /// <summary>
        /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary>
        private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList <AtomicReader> oldReaders, int termInfosIndexDivisor)
        {
            // we put the old SegmentReaders in a map, that allows us
            // to lookup a reader using its segment name
            IDictionary <string, int?> segmentReaders = new Dictionary <string, int?>();

            if (oldReaders != null)
            {
                // create a Map SegmentName->SegmentReader
                for (int i = 0, c = oldReaders.Count; i < c; i++)
                {
                    SegmentReader sr = (SegmentReader)oldReaders[i];
                    segmentReaders[sr.SegmentName] = Convert.ToInt32(i);
                }
            }

            SegmentReader[] newReaders = new SegmentReader[infos.Count];

            // remember which readers are shared between the old and the re-opened
            // DirectoryReader - we have to incRef those readers
            bool[] readerShared = new bool[infos.Count];

            for (int i = infos.Count - 1; i >= 0; i--)
            {
                // find SegmentReader for this segment
                int?oldReaderIndex;
                segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex);
                if (oldReaderIndex == null)
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex];
                }

                bool      success = false;
                Exception prior   = null;
                try
                {
                    SegmentReader newReader;
                    if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile)
                    {
                        // this is a new reader; in case we hit an exception we can close it safely
                        newReader       = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ);
                        readerShared[i] = false;
                        newReaders[i]   = newReader;
                    }
                    else
                    {
                        if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen&& newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen)
                        {
                            // No change; this reader will be shared between
                            // the old and the new one, so we must incRef
                            // it:
                            readerShared[i] = true;
                            newReaders[i].IncRef();
                        }
                        else
                        {
                            // there are changes to the reader, either liveDocs or DV updates
                            readerShared[i] = false;
                            // Steal the ref returned by SegmentReader ctor:
                            Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir);
                            Debug.Assert(infos.Info(i).HasDeletions || infos.Info(i).HasFieldUpdates);
                            if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen)
                            {
                                // only DV updates
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs);
                            }
                            else
                            {
                                // both DV and liveDocs have changed
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]);
                            }
                        }
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    prior = ex;
                }
                finally
                {
                    if (!success)
                    {
                        for (i++; i < infos.Count; i++)
                        {
                            if (newReaders[i] != null)
                            {
                                try
                                {
                                    if (!readerShared[i])
                                    {
                                        // this is a new subReader that is not used by the old one,
                                        // we can close it
                                        newReaders[i].Dispose();
                                    }
                                    else
                                    {
                                        // this subReader is also used by the old reader, so instead
                                        // closing we must decRef it
                                        newReaders[i].DecRef();
                                    }
                                }
                                catch (Exception t)
                                {
                                    if (prior == null)
                                    {
                                        prior = t;
                                    }
                                }
                            }
                        }
                    }
                    // throw the first exception
                    IOUtils.ReThrow(prior);
                }
            }
            return(new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false));
        }
Beispiel #57
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
            {
                IndexReader reader = (IndexReader)iter.Current;
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                    {
                        IndexReader   reader = (IndexReader)iter.Current;
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions())
                        {
                            docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    docCount += ((IndexReader)iter.Current).NumDocs();
                }
            }

            return(docCount);
        }
Beispiel #58
0
        /// <summary> Test field norms.</summary>
        private Status.FieldNormStatus TestFieldNorms(IEnumerable <string> fieldNames, SegmentReader reader, IState state)
        {
            var status = new Status.FieldNormStatus();

            try
            {
                // Test Field Norms
                if (infoStream != null)
                {
                    infoStream.Write("    test: field norms.........");
                }

                var b = new byte[reader.MaxDoc];
                foreach (string fieldName in fieldNames)
                {
                    if (reader.HasNorms(fieldName, state))
                    {
                        reader.Norms(fieldName, b, 0, state);
                        ++status.totFields;
                    }
                }

                Msg("OK [" + status.totFields + " fields]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Beispiel #59
0
		public virtual void  TestMerge()
		{
			//System.out.println("----------------TestMerge------------------");
			SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false);
			merger.Add(reader1);
			merger.Add(reader2);
			try
			{
				int docsMerged = merger.Merge();
				merger.CloseReaders();
				Assert.IsTrue(docsMerged == 2);
				//Should be able to open a new SegmentReader against the new directory
				SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
				Assert.IsTrue(mergedReader != null);
				Assert.IsTrue(mergedReader.NumDocs() == 2);
				Document newDoc1 = mergedReader.Document(0);
				Assert.IsTrue(newDoc1 != null);
				//There are 2 unstored fields on the document
				Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2);
				Document newDoc2 = mergedReader.Document(1);
				Assert.IsTrue(newDoc2 != null);
				Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2);
				
				TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field"));
				Assert.IsTrue(termDocs != null);
				Assert.IsTrue(termDocs.Next() == true);
				
				System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true);
				Assert.IsTrue(stored != null);
				//System.out.println("stored size: " + stored.size());
				Assert.IsTrue(stored.Count == 2);
				
				TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
				Assert.IsTrue(vector != null);
				System.String[] terms = vector.GetTerms();
				Assert.IsTrue(terms != null);
				//System.out.println("Terms size: " + terms.length);
				Assert.IsTrue(terms.Length == 3);
				int[] freqs = vector.GetTermFrequencies();
				Assert.IsTrue(freqs != null);
				//System.out.println("Freqs size: " + freqs.length);
				
				for (int i = 0; i < terms.Length; i++)
				{
					System.String term = terms[i];
					int freq = freqs[i];
					//System.out.println("Term: " + term + " Freq: " + freq);
					Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != - 1);
					Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
				}
			}
			catch (System.IO.IOException e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
			//System.out.println("---------------------end TestMerge-------------------");
		}
Beispiel #60
0
        public virtual void TestPayloadFieldBit()
        {
            Directory       ram      = NewDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document        d        = new Document();

            // this field won't have any payloads
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // only add payload data for field f2
#pragma warning disable 612, 618
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
#pragma warning restore 612, 618
            writer.AddDocument(d);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").HasPayloads, "Payload field bit should not be set.");
            reader.Dispose();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            analyzer = new PayloadAnalyzer(); // Clear payload state for each field
            writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE));
            d        = new Document();
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // add payload data for field f2 and f3
#pragma warning disable 612, 618
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
#pragma warning restore 612, 618
            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            fi     = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads, "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").HasPayloads, "Payload field bit should be set.");
            reader.Dispose();
            ram.Dispose();
        }