// maxAllowed = the "highest" we can index, but we will still
        // randomly index at lower IndexOption
        private FieldsProducer BuildIndex(Directory dir, FieldInfo.IndexOptions maxAllowed, bool allowPayloads, bool alwaysTestMax)
        {
            Codec codec = Codec;
            SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", MaxDoc, false, codec, null);

            int maxIndexOption = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(maxAllowed);
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now build index");
            }

            int maxIndexOptionNoOffsets = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

            // TODO use allowPayloads

            var newFieldInfoArray = new FieldInfo[Fields.Count];
            for (int fieldUpto = 0; fieldUpto < Fields.Count; fieldUpto++)
            {
                FieldInfo oldFieldInfo = FieldInfos.FieldInfo(fieldUpto);

                string pf = TestUtil.GetPostingsFormat(codec, oldFieldInfo.Name);
                int fieldMaxIndexOption;
                if (DoesntSupportOffsets.Contains(pf))
                {
                    fieldMaxIndexOption = Math.Min(maxIndexOptionNoOffsets, maxIndexOption);
                }
                else
                {
                    fieldMaxIndexOption = maxIndexOption;
                }

                // Randomly picked the IndexOptions to index this
                // field with:
                FieldInfo.IndexOptions indexOptions = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToArray()[alwaysTestMax ? fieldMaxIndexOption : Random().Next(1 + fieldMaxIndexOption)];
                bool doPayloads = indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;

                newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.Name, true, fieldUpto, false, false, doPayloads, indexOptions, null, DocValuesType.NUMERIC, null);
            }

            FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);

            // Estimate that flushed segment size will be 25% of
            // what we use in RAM:
            long bytes = TotalPostings * 8 + TotalPayloadBytes;

            SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, 32, null, new IOContext(new FlushInfo(MaxDoc, bytes)));
            FieldsConsumer fieldsConsumer = codec.PostingsFormat().FieldsConsumer(writeState);

            foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields)
            {
                string field = fieldEnt.Key;
                IDictionary<BytesRef, long> terms = fieldEnt.Value;

                FieldInfo fieldInfo = newFieldInfos.FieldInfo(field);

                FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions;

                if (VERBOSE)
                {
                    Console.WriteLine("field=" + field + " indexOtions=" + indexOptions);
                }

                bool doFreq = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
                bool doPos = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                bool doPayloads = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && allowPayloads;
                bool doOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

                TermsConsumer termsConsumer = fieldsConsumer.AddField(fieldInfo);
                long sumTotalTF = 0;
                long sumDF = 0;
                FixedBitSet seenDocs = new FixedBitSet(MaxDoc);
                foreach (KeyValuePair<BytesRef, long> termEnt in terms)
                {
                    BytesRef term = termEnt.Key;
                    SeedPostings postings = GetSeedPostings(term.Utf8ToString(), termEnt.Value, false, maxAllowed);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  term=" + field + ":" + term.Utf8ToString() + " docFreq=" + postings.DocFreq + " seed=" + termEnt.Value);
                    }

                    PostingsConsumer postingsConsumer = termsConsumer.StartTerm(term);
                    long totalTF = 0;
                    int docID = 0;
                    while ((docID = postings.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        int freq = postings.Freq();
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + postings.Upto + ": docID=" + docID + " freq=" + postings.Freq_Renamed);
                        }
                        postingsConsumer.StartDoc(docID, doFreq ? postings.Freq_Renamed : -1);
                        seenDocs.Set(docID);
                        if (doPos)
                        {
                            totalTF += postings.Freq_Renamed;
                            for (int posUpto = 0; posUpto < freq; posUpto++)
                            {
                                int pos = postings.NextPosition();
                                BytesRef payload = postings.Payload;

                                if (VERBOSE)
                                {
                                    if (doPayloads)
                                    {
                                        Console.WriteLine("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.Length + " bytes"));
                                    }
                                    else
                                    {
                                        Console.WriteLine("      pos=" + pos);
                                    }
                                }
                                postingsConsumer.AddPosition(pos, doPayloads ? payload : null, doOffsets ? postings.StartOffset() : -1, doOffsets ? postings.EndOffset() : -1);
                            }
                        }
                        else if (doFreq)
                        {
                            totalTF += freq;
                        }
                        else
                        {
                            totalTF++;
                        }
                        postingsConsumer.FinishDoc();
                    }
                    termsConsumer.FinishTerm(term, new TermStats(postings.DocFreq, doFreq ? totalTF : -1));
                    sumTotalTF += totalTF;
                    sumDF += postings.DocFreq;
                }

                termsConsumer.Finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.Cardinality());
            }

            fieldsConsumer.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: after indexing: files=");
                foreach (string file in dir.ListAll())
                {
                    Console.WriteLine("  " + file + ": " + dir.FileLength(file) + " bytes");
                }
            }

            CurrentFieldInfos = newFieldInfos;

            SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);

            return codec.PostingsFormat().FieldsProducer(readState);
        }
Beispiel #2
0
 /// <summary>
 /// Returns a new Directory instance, using the specified random
 /// with contents copied from the provided directory. See
 /// <seealso cref="#newDirectory()"/> for more information.
 /// </summary>
 public static BaseDirectoryWrapper NewDirectory(Random r, Directory d)
 {
     Directory impl = NewDirectoryImpl(r, TEST_DIRECTORY);
     foreach (string file in d.ListAll())
     {
         d.Copy(impl, file, file, NewIOContext(r));
     }
     return WrapDirectory(r, impl, Rarely(r));
 }
        /// <summary>
        /// Initialize the deleter: find all previous commits in
        /// the Directory, incref the files they reference, call
        /// the policy to let it delete commits.  this will remove
        /// any files not referenced by any of the commits. </summary>
        /// <exception cref="IOException"> if there is a low-level IO error </exception>
        public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, InfoStream infoStream, IndexWriter writer, bool initialIndexExists)
        {
            this.InfoStream = infoStream;
            this.Writer = writer;

            string currentSegmentsFile = segmentInfos.SegmentsFileName;

            if (infoStream.IsEnabled("IFD"))
            {
                infoStream.Message("IFD", "init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy);
            }

            this.Policy = policy;
            this.Directory = directory;

            // First pass: walk the files and initialize our ref
            // counts:
            long currentGen = segmentInfos.Generation;

            CommitPoint currentCommitPoint = null;
            string[] files = null;
            try
            {
                files = directory.ListAll();
            }
            catch (NoSuchDirectoryException e)
            {
                // it means the directory is empty, so ignore it.
                files = new string[0];
            }

            if (currentSegmentsFile != null)
            {
                Regex r = IndexFileNames.CODEC_FILE_PATTERN;
                foreach (string fileName in files)
                {
                    if (!fileName.EndsWith("write.lock") && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)
                        && (r.IsMatch(fileName) || fileName.StartsWith(IndexFileNames.SEGMENTS)))
                    {
                        // Add this file to refCounts with initial count 0:
                        GetRefCount(fileName);

                        if (fileName.StartsWith(IndexFileNames.SEGMENTS))
                        {
                            // this is a commit (segments or segments_N), and
                            // it's valid (<= the max gen).  Load it, then
                            // incref all files it refers to:
                            if (infoStream.IsEnabled("IFD"))
                            {
                                infoStream.Message("IFD", "init: load commit \"" + fileName + "\"");
                            }
                            SegmentInfos sis = new SegmentInfos();
                            try
                            {
                                sis.Read(directory, fileName);
                            }
                            catch (FileNotFoundException e)
                            {
                                // LUCENE-948: on NFS (and maybe others), if
                                // you have writers switching back and forth
                                // between machines, it's very likely that the
                                // dir listing will be stale and will claim a
                                // file segments_X exists when in fact it
                                // doesn't.  So, we catch this and handle it
                                // as if the file does not exist
                                if (infoStream.IsEnabled("IFD"))
                                {
                                    infoStream.Message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
                                }
                                sis = null;
                            }
                            catch (IOException e)
                            {
                                if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen && directory.FileLength(fileName) > 0)
                                {
                                    throw e;
                                }
                                else
                                {
                                    // Most likely we are opening an index that
                                    // has an aborted "future" commit, so suppress
                                    // exc in this case
                                    sis = null;
                                }
                            }
                            if (sis != null)
                            {
                                CommitPoint commitPoint = new CommitPoint(CommitsToDelete, directory, sis);
                                if (sis.Generation == segmentInfos.Generation)
                                {
                                    currentCommitPoint = commitPoint;
                                }
                                Commits.Add(commitPoint);
                                IncRef(sis, true);

                                if (LastSegmentInfos_Renamed == null || sis.Generation > LastSegmentInfos_Renamed.Generation)
                                {
                                    LastSegmentInfos_Renamed = sis;
                                }
                            }
                        }
                    }
                }
            }

            if (currentCommitPoint == null && currentSegmentsFile != null && initialIndexExists)
            {
                // We did not in fact see the segments_N file
                // corresponding to the segmentInfos that was passed
                // in.  Yet, it must exist, because our caller holds
                // the write lock.  this can happen when the directory
                // listing was stale (eg when index accessed via NFS
                // client with stale directory listing cache).  So we
                // try now to explicitly open this commit point:
                SegmentInfos sis = new SegmentInfos();
                try
                {
                    sis.Read(directory, currentSegmentsFile);
                }
                catch (IOException e)
                {
                    throw new CorruptIndexException("failed to locate current segments_N file \"" + currentSegmentsFile + "\"");
                }
                if (infoStream.IsEnabled("IFD"))
                {
                    infoStream.Message("IFD", "forced open of current segments file " + segmentInfos.SegmentsFileName);
                }
                currentCommitPoint = new CommitPoint(CommitsToDelete, directory, sis);
                Commits.Add(currentCommitPoint);
                IncRef(sis, true);
            }

            // We keep commits list in sorted order (oldest to newest):
            CollectionUtil.TimSort(Commits);

            // Now delete anything with ref count at 0.  These are
            // presumably abandoned files eg due to crash of
            // IndexWriter.
            foreach (KeyValuePair<string, RefCount> entry in RefCounts)
            {
                RefCount rc = entry.Value;
                string fileName = entry.Key;
                if (0 == rc.Count)
                {
                    if (infoStream.IsEnabled("IFD"))
                    {
                        infoStream.Message("IFD", "init: removing unreferenced file \"" + fileName + "\"");
                    }
                    DeleteFile(fileName);
                }
            }

            // Finally, give policy a chance to remove things on
            // startup:
            Policy.OnInit(Commits);

            // Always protect the incoming segmentInfos since
            // sometime it may not be the most recent commit
            Checkpoint(segmentInfos, false);

            StartingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.Deleted;

            DeleteCommits();
        }