Exemplo n.º 1
0
        public void  CloseDocStore(SegmentWriteState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    InitFieldsWriter();
                    Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID    = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    if (4 + ((long)state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
                    {
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
                    }
                }
            }
        }
        internal override void closeDocStore(DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    initFieldsWriter();
                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID    = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);

                    string fdtFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION;
                    string fdxFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    state.flushedFiles[fdtFile] = fdtFile;
                    state.flushedFiles[fdxFile] = fdxFile;

                    state.docWriter.RemoveOpenFile(fdtFile);
                    state.docWriter.RemoveOpenFile(fdxFile);

                    if (4 + state.numDocsInStore * 8 != state.directory.FileLength(fdxFile))
                    {
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fdxFile) + " length in bytes of " + fdxFile);
                    }
                }
            }
        }
        public void CloseDocStore(SegmentWriteState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    InitFieldsWriter();
                    Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
                }
            }
        }
Exemplo n.º 4
0
        private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc;
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Exemplo n.º 5
0
        private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Exemplo n.º 6
0
        public /*internal*/ void  AddDocument(System.String segment, Document doc)
        {
            // write field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
            fieldOffsets   = new int[fieldInfos.Size()]; // init fieldOffsets

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(segment);
        }
Exemplo n.º 7
0
		public void  AddDocument(System.String segment, Document doc)
		{
			// write field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
			float boost = doc.GetBoost();
			for (int i = 0; i < fieldBoosts.Length; i++)
			{
				fieldBoosts[i] = boost;
			}
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(segment);
		}
		private void  InitFieldsWriter()
		{
			if (fieldsWriter == null)
			{
				System.String docStoreSegment = docWriter.DocStoreSegment;
				if (docStoreSegment != null)
				{
					System.Diagnostics.Debug.Assert(docStoreSegment != null);
					fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
					lastDocID = 0;
				}
			}
		}
Exemplo n.º 9
0
 private void  InitFieldsWriter()
 {
     if (fieldsWriter == null)
     {
         System.String docStoreSegment = docWriter.GetDocStoreSegment();
         if (docStoreSegment != null)
         {
             System.Diagnostics.Debug.Assert(docStoreSegment != null);
             fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos);
             docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
             docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
             lastDocID = 0;
         }
     }
 }
Exemplo n.º 10
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
            // in  merge mode, we use this FieldSelector
            FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Exemplo n.º 11
0
 internal void  Abort()
 {
     lock (this)
     {
         if (fieldsWriter != null)
         {
             try
             {
                 fieldsWriter.Close();
             }
             catch (System.Exception t)
             {
             }
             fieldsWriter = null;
             lastDocID    = 0;
         }
     }
 }
 internal override void Abort()
 {
     lock (this)
     {
         if (fieldsWriter != null)
         {
             try
             {
                 fieldsWriter.Close();
             }
             catch (System.Exception)
             {
             }
             fieldsWriter = null;
             lastDocID = 0;
         }
     }
 }
Exemplo n.º 13
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Exemplo n.º 14
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        bool          hasMatchingReader;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = null;
                                hasMatchingReader    = false;
                            }
                            else
                            {
                                matchingFieldsReader = fieldsReader;
                                hasMatchingReader    = true;
                            }
                        }
                        else
                        {
                            hasMatchingReader    = false;
                            matchingFieldsReader = null;
                        }
                        int  maxDoc       = reader.MaxDoc();
                        bool hasDeletions = reader.HasDeletions();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!hasDeletions || !reader.IsDeleted(j))
                            { // skip deleted docs
                                if (hasMatchingReader)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                        if (j >= maxDoc)
                                        {
                                            break;
                                        }
                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
                                        {
                                            j++;
                                            break;
                                        }
                                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    // NOTE: it's very important to first assign
                                    // to doc then pass it to
                                    // termVectorsWriter.addAllDocVectors; see
                                    // LUCENE-1282
                                    Document doc = reader.Document(j, fieldSelectorMerge);
                                    fieldsWriter.AddDocument(doc);
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                // {{dougsale-2.4.0}
                // this shouldn't be a problem for us - if it is,
                // then it's not a JRE bug...
                //if (4+docCount*8 != fdxFileLength)
                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                //  // we detect that the bug has struck, here, and
                //  // throw an exception to prevent the corruption from
                //  // entering the index.  See LUCENE-1282 for
                //  // details.
                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");
            }
            else
            {
                // If we are skipping the doc stores, that means there
                // are no deletions in any of these segments, so we
                // just sum numDocs() of each segment to get total docCount
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
Exemplo n.º 15
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		private int MergeFields()
		{
			
			if (!mergeDocStores)
			{
				// When we are not merging by doc stores, that means
				// all segments were written as part of a single
				// autoCommit=false IndexWriter session, so their field
				// name -> number mapping are the same.  So, we start
				// with the fieldInfos of the last segment in this
				// case, to keep that numbering.
				SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
				fieldInfos = (FieldInfos) sr.fieldInfos.Clone();
			}
			else
			{
				fieldInfos = new FieldInfos(); // merge field names
			}
			
			for (int i = 0; i < readers.Count; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				if (reader is SegmentReader)
				{
					SegmentReader segmentReader = (SegmentReader) reader;
					for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
					{
						FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
					}
				}
				else
				{
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
					fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
				}
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			int docCount = 0;
			
			if (mergeDocStores)
			{
				
				// If the i'th reader is a SegmentReader and has
				// identical fieldName -> number mapping, then this
				// array will be non-null at position i:
				SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];
				
				// If this reader is a SegmentReader, and all of its
				// field name -> number mappings match the "merged"
				// FieldInfos, then we can do a bulk copy of the
				// stored fields:
				for (int i = 0; i < readers.Count; i++)
				{
					IndexReader reader = (IndexReader) readers[i];
					if (reader is SegmentReader)
					{
						SegmentReader segmentReader = (SegmentReader) reader;
						bool same = true;
						FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos();
						for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
							same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
						if (same)
						{
							matchingSegmentReaders[i] = segmentReader;
						}
					}
				}
				
				// Used for bulk-reading raw bytes for stored fields
				int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
				
				// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
				// in  merge mode, we use this FieldSelector
				FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
				
				// merge field values
				FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
				
				try
				{
					for (int i = 0; i < readers.Count; i++)
					{
						IndexReader reader = (IndexReader) readers[i];
						SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
						FieldsReader matchingFieldsReader;
						if (matchingSegmentReader != null)
							matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
						else
							matchingFieldsReader = null;
						int maxDoc = reader.MaxDoc();
						for (int j = 0; j < maxDoc; )
						{
							if (!reader.IsDeleted(j))
							{
								// skip deleted docs
								if (matchingSegmentReader != null)
								{
									// We can optimize this case (doing a bulk
									// byte copy) since the field numbers are
									// identical
									int start = j;
									int numDocs = 0;
									do 
									{
										j++;
										numDocs++;
									}
									while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);
									
									IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
									fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
									docCount += numDocs;
									if (checkAbort != null)
										checkAbort.Work(300 * numDocs);
								}
								else
								{
									fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
									j++;
									docCount++;
									if (checkAbort != null)
										checkAbort.Work(300);
								}
							}
							else
								j++;
						}
					}
				}
				finally
				{
					fieldsWriter.Close();
				}

                System.Diagnostics.Debug.Assert(docCount*8 == directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION),
                    "after MergeFields: fdx size mismatch: " + docCount + " docs vs " + 
                    directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) +
                    " length in bytes of " + segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); 
			}
			// If we are skipping the doc stores, that means there
			// are no deletions in any of these segments, so we
			// just sum numDocs() of each segment to get total docCount
			else
				for (int i = 0; i < readers.Count; i++)
					docCount += ((IndexReader) readers[i]).NumDocs();
			
			return docCount;
		}
Exemplo n.º 16
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Exemplo n.º 17
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Exemplo n.º 18
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
                fieldInfos = (FieldInfos) sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos(); // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader) reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader matchingFieldsReader;
                        bool hasMatchingReader;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = null;
                                hasMatchingReader = false;
                            }
                            else
                            {
                                matchingFieldsReader = fieldsReader;
                                hasMatchingReader = true;
                            }
                        }
                        else
                        {
                            hasMatchingReader = false;
                            matchingFieldsReader = null;
                        }
                        int maxDoc = reader.MaxDoc();
                        bool hasDeletions = reader.HasDeletions();
                        for (int j = 0; j < maxDoc; )
                        {
                            if (!hasDeletions || !reader.IsDeleted(j))
                            { // skip deleted docs
                                if (hasMatchingReader)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                        if (j >= maxDoc)
                                            break;
                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
                                        {
                                            j++;
                                            break;
                                        }
                                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                        checkAbort.Work(300 * numDocs);
                                }
                                else
                                {
                                    // NOTE: it's very important to first assign
                                    // to doc then pass it to
                                    // termVectorsWriter.addAllDocVectors; see
                                    // LUCENE-1282
                                    Document doc = reader.Document(j, fieldSelectorMerge);
                                    fieldsWriter.AddDocument(doc);
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                        checkAbort.Work(300);
                                }
                            }
                            else
                                j++;
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                // {{dougsale-2.4.0}
                // this shouldn't be a problem for us - if it is,
                // then it's not a JRE bug...
                //if (4+docCount*8 != fdxFileLength)
                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                //  // we detect that the bug has struck, here, and
                //  // throw an exception to prevent the corruption from
                //  // entering the index.  See LUCENE-1282 for
                //  // details.
                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");

            }
            else
                // If we are skipping the doc stores, that means there
                // are no deletions in any of these segments, so we
                // just sum numDocs() of each segment to get total docCount
                for (int i = 0; i < readers.Count; i++)
                    docCount += ((IndexReader)readers[i]).NumDocs();

            return docCount;
        }
Exemplo n.º 19
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		private int MergeFields()
		{
			
			if (!mergeDocStores)
			{
				// When we are not merging by doc stores, that means
				// all segments were written as part of a single
				// autoCommit=false IndexWriter session, so their field
				// name -> number mapping are the same.  So, we start
				// with the fieldInfos of the last segment in this
				// case, to keep that numbering.
				SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
				fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
			}
			else
			{
				fieldInfos = new FieldInfos(); // merge field names
			}
			
			for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
			{
				IndexReader reader = (IndexReader) iter.Current;
				if (reader is SegmentReader)
				{
					SegmentReader segmentReader = (SegmentReader) reader;
					FieldInfos readerFieldInfos = segmentReader.FieldInfos();
					int numReaderFieldInfos = readerFieldInfos.Size();
					for (int j = 0; j < numReaderFieldInfos; j++)
					{
						FieldInfo fi = readerFieldInfos.FieldInfo(j);
						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
					}
				}
				else
				{
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
					fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
				}
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			int docCount = 0;
			
			SetMatchingSegmentReaders();
			
			if (mergeDocStores)
			{
				
				// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
				// in  merge mode, we use this FieldSelector
				FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
				
				// merge field values
				FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
				
				try
				{
					int idx = 0;
					for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
					{
						IndexReader reader = (IndexReader) iter.Current;
						SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
						FieldsReader matchingFieldsReader = null;
						if (matchingSegmentReader != null)
						{
							FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
							if (fieldsReader != null && fieldsReader.CanReadRawDocs())
							{
								matchingFieldsReader = fieldsReader;
							}
						}
						if (reader.HasDeletions())
						{
							docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
						}
						else
						{
							docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
						}
					}
				}
				finally
				{
					fieldsWriter.Close();
				}
				
				System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
				long fdxFileLength = directory.FileLength(fileName);
				
				if (4 + ((long) docCount) * 8 != fdxFileLength)
				// This is most likely a bug in Sun JRE 1.6.0_04/_05;
				// we detect that the bug has struck, here, and
				// throw an exception to prevent the corruption from
				// entering the index.  See LUCENE-1282 for
				// details.
					throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
			}
			// If we are skipping the doc stores, that means there
			// are no deletions in any of these segments, so we
			// just sum numDocs() of each segment to get total docCount
			else
			{
				for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
				{
					docCount += ((IndexReader) iter.Current).NumDocs();
				}
			}
			
			return docCount;
		}
Exemplo n.º 20
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos(); // merge field names
            }

            foreach (IndexReader reader in readers)
            {
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    foreach (IndexReader reader in readers)
                    {
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions)
                        {
                            docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Dispose();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                foreach (IndexReader reader in readers)
                {
                    docCount += reader.NumDocs();
                }
            }

            return(docCount);
        }
Exemplo n.º 21
0
			/// <summary>Initializes shared state for this new document </summary>
			internal void  Init(Document doc, int docID)
			{

                System.Diagnostics.Debug.Assert(!isIdle);
                System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
				
				this.docID = docID;
				docBoost = doc.GetBoost();
				numStoredFields = 0;
				numFieldData = 0;
				numVectorFields = 0;
				maxTermPrefix = null;
				
				System.Diagnostics.Debug.Assert(0 == fdtLocal.Length());
				System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.Length());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer());
				int thisFieldGen = fieldGen++;
				
				System.Collections.IList docFields = doc.GetFields();
				int numDocFields = docFields.Count;
				bool docHasVectors = false;
				
				// Absorb any new fields first seen in this document.
				// Also absorb any changes to fields we had already
				// seen before (eg suddenly turning on norms or
				// vectors, etc.):
				
				for (int i = 0; i < numDocFields; i++)
				{
					Fieldable field = (Fieldable) docFields[i];
					
					FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false);
					if (fi.isIndexed && !fi.omitNorms)
					{
						// Maybe grow our buffered norms
						if (Enclosing_Instance.norms.Length <= fi.number)
						{
							int newSize = (int) ((1 + fi.number) * 1.25);
							BufferedNorms[] newNorms = new BufferedNorms[newSize];
							Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length);
							Enclosing_Instance.norms = newNorms;
						}
						
						if (Enclosing_Instance.norms[fi.number] == null)
							Enclosing_Instance.norms[fi.number] = new BufferedNorms();
						
						Enclosing_Instance.hasNorms = true;
					}
					
					// Make sure we have a FieldData allocated
					int hashPos = fi.name.GetHashCode() & fieldDataHashMask;
					FieldData fp = fieldDataHash[hashPos];
					while (fp != null && !fp.fieldInfo.name.Equals(fi.name))
						fp = fp.next;
					
					if (fp == null)
					{
						
						fp = new FieldData(this, fi);
						fp.next = fieldDataHash[hashPos];
						fieldDataHash[hashPos] = fp;
						
						if (numAllFieldData == allFieldDataArray.Length)
						{
							int newSize = (int) (allFieldDataArray.Length * 1.5);
							int newHashSize = fieldDataHash.Length * 2;
							
							FieldData[] newArray = new FieldData[newSize];
							FieldData[] newHashArray = new FieldData[newHashSize];
							Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData);
							
							// Rehash
							fieldDataHashMask = newSize - 1;
							for (int j = 0; j < fieldDataHash.Length; j++)
							{
								FieldData fp0 = fieldDataHash[j];
								while (fp0 != null)
								{
									hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask;
									FieldData nextFP0 = fp0.next;
									fp0.next = newHashArray[hashPos];
									newHashArray[hashPos] = fp0;
									fp0 = nextFP0;
								}
							}
							
							allFieldDataArray = newArray;
							fieldDataHash = newHashArray;
						}
						allFieldDataArray[numAllFieldData++] = fp;
					}
					else
					{
						System.Diagnostics.Debug.Assert(fp.fieldInfo == fi);
					}
					
					if (thisFieldGen != fp.lastGen)
					{
						
						// First time we're seeing this field for this doc
						fp.lastGen = thisFieldGen;
						fp.fieldCount = 0;
						fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false;
						fp.doNorms = fi.isIndexed && !fi.omitNorms;
						
						if (numFieldData == fieldDataArray.Length)
						{
							int newSize = fieldDataArray.Length * 2;
							FieldData[] newArray = new FieldData[newSize];
							Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData);
							fieldDataArray = newArray;
						}
						fieldDataArray[numFieldData++] = fp;
					}
					
					if (field.IsTermVectorStored())
					{
						if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length)
						{
							int newSize = (int) (numVectorFields * 1.5);
							vectorFieldPointers = new long[newSize];
							vectorFieldNumbers = new int[newSize];
						}
						fp.doVectors = true;
						docHasVectors = true;
						
						fp.doVectorPositions |= field.IsStorePositionWithTermVector();
						fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector();
					}
					
					if (fp.fieldCount == fp.docFields.Length)
					{
						Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2];
						Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length);
						fp.docFields = newArray;
					}
					
					// Lazily allocate arrays for postings:
					if (field.IsIndexed() && fp.postingsHash == null)
						fp.InitPostingArrays();
					
					fp.docFields[fp.fieldCount++] = field;
				}
				
				// Maybe init the local & global fieldsWriter
				if (localFieldsWriter == null)
				{
					if (Enclosing_Instance.fieldsWriter == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null);
						System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null);
						Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment;
						// If we hit an exception while init'ing the
						// fieldsWriter, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos);
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos);
				}
				
				// First time we see a doc that has field(s) with
				// stored vectors, we init our tvx writer
				if (docHasVectors)
				{
					if (Enclosing_Instance.tvx == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null);
						// If we hit an exception while init'ing the term
						// vector output files, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
							Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
							Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
							Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION);
							
							// We must "catch up" for all docs before us
							// that had no vectors:
							for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++)
							{
								Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer());
								Enclosing_Instance.tvd.WriteVInt(0);
							}
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					
					numVectorFields = 0;
				}
			}
Exemplo n.º 22
0
		/// <summary>Called if we hit an exception when adding docs,
		/// flushing, etc.  This resets our state, discarding any
		/// docs added since last flush.  If ae is non-null, it
		/// contains the root cause exception (which we re-throw
		/// after we are done aborting). 
		/// </summary>
		internal void  Abort(AbortException ae)
		{
			lock (this)
			{
				
				// Anywhere that throws an AbortException must first
				// mark aborting to make sure while the exception is
				// unwinding the un-synchronized stack, no thread grabs
				// the corrupt ThreadState that hit the aborting
				// exception:
				System.Diagnostics.Debug.Assert(ae == null || abortCount > 0);
				
				try
				{
					
					if (infoStream != null)
						infoStream.WriteLine("docWriter: now abort");
					
					// Forcefully remove waiting ThreadStates from line
					for (int i = 0; i < numWaiting; i++)
						waitingThreadStates[i].isIdle = true;
					numWaiting = 0;
					
					// Wait for all other threads to finish with DocumentsWriter:
					PauseAllThreads();
					
					System.Diagnostics.Debug.Assert(0 == numWaiting);
					
					try
					{
						
						bufferedDeleteTerms.Clear();
						bufferedDeleteDocIDs.Clear();
						numBufferedDeleteTerms = 0;
						
						try
						{
							abortedFiles = Files();
						}
						catch (System.Exception)
						{
							abortedFiles = null;
						}
						
						docStoreSegment = null;
						numDocsInStore = 0;
						docStoreOffset = 0;
						files = null;
						
						// Clear vectors & fields from ThreadStates
						for (int i = 0; i < threadStates.Length; i++)
						{
							ThreadState state = threadStates[i];
							state.tvfLocal.Reset();
							state.fdtLocal.Reset();
							if (state.localFieldsWriter != null)
							{
								try
								{
									state.localFieldsWriter.Close();
								}
								catch (System.Exception)
								{
								}
								state.localFieldsWriter = null;
							}
						}
						
						// Reset vectors writer
						if (tvx != null)
						{
							try
							{
								tvx.Close();
							}
							catch (System.Exception)
							{
							}
							tvx = null;
						}
						if (tvd != null)
						{
							try
							{
								tvd.Close();
							}
							catch (System.Exception)
							{
							}
							tvd = null;
						}
						if (tvf != null)
						{
							try
							{
								tvf.Close();
							}
							catch (System.Exception)
							{
							}
							tvf = null;
						}
						
						// Reset fields writer
						if (fieldsWriter != null)
						{
							try
							{
								fieldsWriter.Close();
							}
							catch (System.Exception)
							{
							}
							fieldsWriter = null;
						}
						
						// Discard pending norms:
						int numField = fieldInfos.Size();
						for (int i = 0; i < numField; i++)
						{
							FieldInfo fi = fieldInfos.FieldInfo(i);
							if (fi.isIndexed && !fi.omitNorms)
							{
								BufferedNorms n = norms[i];
								if (n != null)
									try
									{
										n.Reset();
									}
									catch (System.Exception)
									{
									}
							}
						}
						
						// Reset all postings data
						ResetPostingsData();
					}
					finally
					{
						ResumeAllThreads();
					}
					
					// If we have a root cause exception, re-throw it now:
					if (ae != null)
					{
						System.Exception t = ae.InnerException;
						if (t is System.IO.IOException)
							throw (System.IO.IOException) t;
						else if (t is System.SystemException)
							throw (System.SystemException) t;
						else if (t is System.ApplicationException)
							throw (System.ApplicationException) t;
						else
							// Should not get here
							System.Diagnostics.Debug.Assert(false, "unknown exception: " + t);
					}
				}
				finally
				{
					if (ae != null)
						abortCount--;
					System.Threading.Monitor.PulseAll(this);
				}
			}
		}
		public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter)
		{
			this.storedFieldsWriter = storedFieldsWriter;
			this.docState = docState;
			localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
		}
Exemplo n.º 24
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            if (mergeDocStores)
            {
                // If the i'th reader is a SegmentReader and has
                // identical fieldName -> number mapping, then this
                // array will be non-null at position i:
                SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];

                // If this reader is a SegmentReader, and all of its
                // field name -> number mappings match the "merged"
                // FieldInfos, then we can do a bulk copy of the
                // stored fields:
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    if (reader is SegmentReader)
                    {
                        SegmentReader segmentReader     = (SegmentReader)reader;
                        bool          same              = true;
                        FieldInfos    segmentFieldInfos = segmentReader.GetFieldInfos();
                        for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
                        {
                            same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
                        }
                        if (same)
                        {
                            matchingSegmentReaders[i] = segmentReader;
                        }
                    }
                }

                // Used for bulk-reading raw bytes for stored fields
                int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];

                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        if (matchingSegmentReader != null)
                        {
                            matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
                        }
                        else
                        {
                            matchingFieldsReader = null;
                        }
                        int maxDoc = reader.MaxDoc();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!reader.IsDeleted(j))
                            {
                                // skip deleted docs
                                if (matchingSegmentReader != null)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                    }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
 public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter)
 {
     this.storedFieldsWriter = storedFieldsWriter;
     this.docState           = docFieldProcessorPerThread.docState;
     localFieldsWriter       = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos);
 }
Exemplo n.º 26
0
			/// <summary>Clear the postings hash and return objects back to
			/// shared pool 
			/// </summary>
			public void  ResetPostings()
			{
				fieldGen = 0;
				maxPostingsVectors = 0;
				doFlushAfter = false;
				if (localFieldsWriter != null)
				{
					localFieldsWriter.Close();
					localFieldsWriter = null;
				}
				postingsPool.Reset();
				charPool.Reset();
				Enclosing_Instance.RecyclePostings(postingsFreeList, postingsFreeCount);
				postingsFreeCount = 0;
				for (int i = 0; i < numAllFieldData; i++)
				{
					FieldData fp = allFieldDataArray[i];
					fp.lastGen = - 1;
					if (fp.numPostings > 0)
						fp.ResetPostingArrays();
				}
			}
        internal override void closeDocStore(DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    initFieldsWriter();
                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);

                    string fdtFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION;
                    string fdxFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    state.flushedFiles[fdtFile] = fdtFile;
                    state.flushedFiles[fdxFile] = fdxFile;

                    state.docWriter.RemoveOpenFile(fdtFile);
                    state.docWriter.RemoveOpenFile(fdxFile);

                    if (4 + state.numDocsInStore * 8 != state.directory.FileLength(fdxFile))
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fdxFile) + " length in bytes of " + fdxFile);
                }
            }
        }
Exemplo n.º 28
0
		/// <summary>Closes the current open doc stores an returns the doc
		/// store segment name.  This returns null if there are *
		/// no buffered documents. 
		/// </summary>
		internal System.String CloseDocStore()
		{
			
			System.Diagnostics.Debug.Assert(AllThreadsIdle());
			
			System.Collections.IList flushedFiles = Files();
			
			if (infoStream != null)
				infoStream.WriteLine("\ncloseDocStore: " + flushedFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
			
			if (flushedFiles.Count > 0)
			{
				files = null;
				
				if (tvx != null)
				{
					// At least one doc in this run had term vectors enabled
					System.Diagnostics.Debug.Assert(docStoreSegment != null);
					tvx.Close();
					tvf.Close();
					tvd.Close();
					tvx = null;
                    System.Diagnostics.Debug.Assert(4 + numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION),
                        "after flush: tvx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) +
                        " length in bytes of " + docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
				}
				
				if (fieldsWriter != null)
				{
					System.Diagnostics.Debug.Assert(docStoreSegment != null);
					fieldsWriter.Close();
					fieldsWriter = null;
                    System.Diagnostics.Debug.Assert(numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION),
                        "after flush: fdx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) +
                        " length in bytes of " + docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
                }
				
				System.String s = docStoreSegment;
				docStoreSegment = null;
				docStoreOffset = 0;
				numDocsInStore = 0;
				return s;
			}
			else
			{
				return null;
			}
		}
Exemplo n.º 29
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos(); // merge field names
            int docCount = 0;
            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader) readers[i];
                    int maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return docCount;
        }
 public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter)
 {
     this.storedFieldsWriter = storedFieldsWriter;
     this.docState = docFieldProcessorPerThread.docState;
     localFieldsWriter = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos);
 }
 public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter)
 {
     this.storedFieldsWriter = storedFieldsWriter;
     this.docState           = docState;
     localFieldsWriter       = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos);
 }
Exemplo n.º 32
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  IOException </throws>
		private int MergeFields()
		{
			fieldInfos = new FieldInfos(); // merge field names
			int docCount = 0;
			for (int i = 0; i < readers.Count; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
				fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			
			// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
			// in  merge mode, we use this FieldSelector
			FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
			
			try
			{
				for (int i = 0; i < readers.Count; i++)
				{
					IndexReader reader = (IndexReader) readers[i];
					int maxDoc = reader.MaxDoc();
					for (int j = 0; j < maxDoc; j++)
						if (!reader.IsDeleted(j))
						{
							// skip deleted docs
							fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
							docCount++;
						}
				}
			}
			finally
			{
				fieldsWriter.Close();
			}
			return docCount;
		}