Ejemplo n.º 1
0
        public /*internal*/ Document.Document Doc(int n, FieldSelector fieldSelector)
        {
            SeekIndex(n);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            var doc       = new Document.Document();
            int numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt();
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte();
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                System.Diagnostics.Debug.Assert(
                    (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
                    "compressed fields are only allowed in indexes of version <= 2.9");
                bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary   = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed);
                    break;
                }
                else
                {
                    SkipField(binary, compressed);
                }
            }

            return(doc);
        }
Ejemplo n.º 2
0
        internal void  AddDocument(Document.Document doc)
        {
            indexStream.WriteLong(fieldsStream.FilePointer);

            System.Collections.Generic.IList <IFieldable> fields = doc.GetFields();
            int storedCount = fields.Count(field => field.IsStored);

            fieldsStream.WriteVInt(storedCount);

            foreach (IFieldable field in fields)
            {
                if (field.IsStored)
                {
                    WriteField(fieldInfos.FieldInfo(field.Name), field);
                }
            }
        }
Ejemplo n.º 3
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void Flush(IDictionary <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> > threadsAndFields, SegmentWriteState state)
        {
            IDictionary <FieldInfo, IList <NormsWriterPerField> > byField = new HashMap <FieldInfo, IList <NormsWriterPerField> >();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            foreach (var entry in threadsAndFields)
            {
                ICollection <InvertedDocEndConsumerPerField> fields   = entry.Value;
                IEnumerator <InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator();
                var fieldsToRemove = new HashSet <NormsWriterPerField>();
                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <NormsWriterPerField> l = byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new List <NormsWriterPerField>();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }
                foreach (var field in fieldsToRemove)
                {
                    fields.Remove(field);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles.Add(normsFileName);
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    IList <NormsWriterPerField> toMerge = byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer);
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Ejemplo n.º 4
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            foreach (IndexReader reader in readers)
            {
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    foreach (IndexReader reader in readers)
                    {
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions)
                        {
                            docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Dispose();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                foreach (IndexReader reader in readers)
                {
                    docCount += reader.NumDocs();
                }
            }

            return(docCount);
        }