Esempio n. 1
0
        internal virtual System.Collections.ArrayList Files()
        {
            System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));

            for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
            {
                System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
                if (Directory().FileExists(name))
                {
                    files.Add(name);
                }
            }

            for (int i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.isIndexed && !fi.omitNorms)
                {
                    System.String name;
                    if (cfsReader == null)
                    {
                        name = segment + ".f" + i;
                    }
                    else
                    {
                        name = segment + ".s" + i;
                    }
                    if (Directory().FileExists(name))
                    {
                        files.Add(name);
                    }
                }
            }
            return(files);
        }
Esempio n. 2
0
        public /*internal*/ void  AddDocument(System.String segment, Document doc)
        {
            // write field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
            fieldOffsets   = new int[fieldInfos.Size()]; // init fieldOffsets

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(segment);
        }
Esempio n. 3
0
        public virtual void  Test()
        {
            //Positive test of FieldInfos
            Assert.IsTrue(testDoc != null);
            FieldInfos fieldInfos = new FieldInfos();

            fieldInfos.Add(testDoc);
            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count);             //this is all b/c we are using the no-arg constructor
            RAMDirectory dir = new RAMDirectory();

            System.String name   = "testFile";
            IndexOutput   output = dir.CreateOutput(name, null);

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            try
            {
                fieldInfos.Write(output);
                output.Close();
                Assert.IsTrue(output.Length > 0);
                FieldInfos readIn = new FieldInfos(dir, name, null);
                Assert.IsTrue(fieldInfos.Size() == readIn.Size());
                FieldInfo info = readIn.FieldInfo("textField1");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == false);

                info = readIn.FieldInfo("textField2");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == true);
                Assert.IsTrue(info.omitNorms_ForNUnit == false);

                info = readIn.FieldInfo("textField3");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == true);

                info = readIn.FieldInfo("omitNorms");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == true);

                dir.Close();
            }
            catch (System.IO.IOException)
            {
                Assert.IsTrue(false);
            }
        }
Esempio n. 4
0
        protected internal override void  DoCommit()
        {
            if (deletedDocsDirty)
            {
                // re-write deleted
                System.String oldDelFileName = si.GetDelFileName();
                if (oldDelFileName != null)
                {
                    // Mark this file for deletion.  Note that we don't
                    // actually try to delete it until the new segments files is
                    // successfully written:
                    deleter.AddPendingFile(oldDelFileName);
                }

                si.AdvanceDelGen();

                // We can write directly to the actual name (vs to a
                // .tmp & renaming it) because the file is not live
                // until segments file is written:
                deletedDocs.Write(Directory(), si.GetDelFileName());
            }
            if (undeleteAll && si.HasDeletions())
            {
                System.String oldDelFileName = si.GetDelFileName();
                if (oldDelFileName != null)
                {
                    // Mark this file for deletion.  Note that we don't
                    // actually try to delete it until the new segments files is
                    // successfully written:
                    deleter.AddPendingFile(oldDelFileName);
                }
                si.ClearDelGen();
            }
            if (normsDirty)
            {
                // re-write norms
                si.SetNumFields(fieldInfos.Size());
                System.Collections.IEnumerator values = norms.Values.GetEnumerator();
                while (values.MoveNext())
                {
                    Norm norm = (Norm)values.Current;
                    if (norm.dirty)
                    {
                        norm.ReWrite(si);
                    }
                }
            }
            deletedDocsDirty = false;
            normsDirty       = false;
            undeleteAll      = false;
        }
Esempio n. 5
0
		public virtual void  Test()
		{
			//Positive test of FieldInfos
			Assert.IsTrue(testDoc != null);
			FieldInfos fieldInfos = new FieldInfos();
			fieldInfos.Add(testDoc);
			//Since the complement is stored as well in the fields map
			Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor
			RAMDirectory dir = new RAMDirectory();
			System.String name = "testFile";
			IndexOutput output = dir.CreateOutput(name);
			Assert.IsTrue(output != null);
			//Use a RAMOutputStream
			
			try
			{
				fieldInfos.Write(output);
				output.Close();
				Assert.IsTrue(output.Length() > 0);
				FieldInfos readIn = new FieldInfos(dir, name);
				Assert.IsTrue(fieldInfos.Size() == readIn.Size());
				FieldInfo info = readIn.FieldInfo("textField1");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField2");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == true);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField3");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				info = readIn.FieldInfo("omitNorms");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				dir.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Esempio n. 6
0
        public virtual void Test()
        {
            string     name       = "testFile";
            Directory  dir        = NewDirectory();
            FieldInfos fieldInfos = CreateAndWriteFieldInfos(dir, name);

            FieldInfos readIn = ReadFieldInfos(dir, name);

            Assert.IsTrue(fieldInfos.Size() == readIn.Size());
            FieldInfo info = readIn.FieldInfo("textField1");

            Assert.IsTrue(info != null);
            Assert.IsTrue(info.HasVectors() == false);
            Assert.IsTrue(info.OmitsNorms() == false);

            info = readIn.FieldInfo("textField2");
            Assert.IsTrue(info != null);
            Assert.IsTrue(info.OmitsNorms() == false);

            info = readIn.FieldInfo("textField3");
            Assert.IsTrue(info != null);
            Assert.IsTrue(info.HasVectors() == false);
            Assert.IsTrue(info.OmitsNorms() == true);

            info = readIn.FieldInfo("omitNorms");
            Assert.IsTrue(info != null);
            Assert.IsTrue(info.HasVectors() == false);
            Assert.IsTrue(info.OmitsNorms() == true);

            dir.Dispose();
        }
Esempio n. 7
0
        public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename)
        {
            //Positive test of FieldInfos
            Assert.IsTrue(TestDoc != null);
            FieldInfos.Builder builder = new FieldInfos.Builder();
            foreach (IndexableField field in TestDoc)
            {
                builder.AddOrUpdate(field.Name(), field.FieldType());
            }
            FieldInfos fieldInfos = builder.Finish();

            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Size() == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor

            IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random()));

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            FieldInfosWriter writer = Codec.Default.FieldInfosFormat().FieldInfosWriter;

            writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT);
            output.Dispose();
            return(fieldInfos);
        }
Esempio n. 8
0
        private void SetMatchingSegmentReaders()
        {
            // if the i'th reader is a SegmentReader and has
            // identical fieldName->number mapping the this
            // array will be non-null at position i:
            matchingSegmentReaders = new SegmentReader[readers.Count];

            // if this reader is a SegmentReader, and all of its
            // fieldName->number mappings match the "merged"
            // FieldInfos, then we can do a bulk copy of the
            // stored fields
            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader     = (SegmentReader)reader;
                    bool          same              = true;
                    FieldInfos    segmentFieldInfos = segmentReader.GetFieldInfos();
                    for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
                    {
                        same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
                    }
                    if (same)
                    {
                        matchingSegmentReaders[i] = segmentReader;
                    }
                }
            }

            // used for bulk-reading raw bytes for stored fields
            rawDocLengths  = new int[MAX_RAW_MERGE_DOCS];
            rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
        }
Esempio n. 9
0
 private void AssertReadOnly(FieldInfos readOnly, FieldInfos modifiable)
 {
     Assert.AreEqual(modifiable.Size(), readOnly.Size());
     // assert we can iterate
     foreach (FieldInfo fi in readOnly)
     {
         Assert.AreEqual(fi.Name, modifiable.FieldInfo(fi.Number).Name);
     }
 }
Esempio n. 10
0
        public System.Collections.ArrayList CreateCompoundFile(System.String fileName)
        {
            CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName);

            System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.Length + 1));

            // Basic files
            for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
            {
                files.Add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
            }

            // Fieldable norm files
            for (int i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.isIndexed && !fi.omitNorms)
                {
                    files.Add(segment + "." + IndexFileNames.NORMS_EXTENSION);
                    break;
                }
            }

            // Vector files
            if (fieldInfos.HasVectors())
            {
                for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
                {
                    files.Add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
                }
            }

            // Now merge all added files
            System.Collections.IEnumerator it = files.GetEnumerator();
            while (it.MoveNext())
            {
                cfsWriter.AddFile((System.String)it.Current);
            }

            // Perform the merge
            cfsWriter.Close();

            return(files);
        }
Esempio n. 11
0
        public virtual void TestFieldNames()
        {
            Directory            dir1       = GetDir1(Random());
            Directory            dir2       = GetDir2(Random());
            ParallelAtomicReader pr         = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir1)), SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir2)));
            FieldInfos           fieldInfos = pr.FieldInfos;

            Assert.AreEqual(4, fieldInfos.Size());
            Assert.IsNotNull(fieldInfos.FieldInfo("f1"));
            Assert.IsNotNull(fieldInfos.FieldInfo("f2"));
            Assert.IsNotNull(fieldInfos.FieldInfo("f3"));
            Assert.IsNotNull(fieldInfos.FieldInfo("f4"));
            pr.Dispose();
            dir1.Dispose();
            dir2.Dispose();
        }
Esempio n. 12
0
 private void  WriteNorms(System.String segment)
 {
     for (int n = 0; n < fieldInfos.Size(); n++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(n);
         if (fi.isIndexed && !fi.omitNorms)
         {
             float       norm  = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
             IndexOutput norms = directory.CreateOutput(segment + ".f" + n);
             try
             {
                 norms.WriteByte(Similarity.EncodeNorm(norm));
             }
             finally
             {
                 norms.Close();
             }
         }
     }
 }
Esempio n. 13
0
        public virtual void TestExactFileNames()
        {
            System.String outputDir = "lucene.backwardscompat0.index";
            RmDir(outputDir);

            try
            {
                Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir)));

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true,
                                                     IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetRAMBufferSizeMB(16.0);
                for (int i = 0; i < 35; i++)
                {
                    AddDoc(writer, i);
                }
                Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
                writer.Close();

                // Delete one doc so we get a .del file:
                IndexReader reader     = IndexReader.Open(dir, false);
                Term        searchTerm = new Term("id", "7");
                int         delCount   = reader.DeleteDocuments(searchTerm);
                Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

                // Set one norm so we get a .s0 file:
                reader.SetNorm(21, "content", (float)1.5);
                reader.Close();

                // The numbering of fields can vary depending on which
                // JRE is in use.  On some JREs we see content bound to
                // field 0; on others, field 1.  So, here we have to
                // figure out which field number corresponds to
                // "content", and then set our expected file names below
                // accordingly:
                CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_0.cfs");
                FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
                int contentFieldIndex         = -1;
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.name_ForNUnit.Equals("content"))
                    {
                        contentFieldIndex = i;
                        break;
                    }
                }
                cfsReader.Close();
                Assert.IsTrue(contentFieldIndex != -1,
                              "could not locate the 'content' field number in the _2.cfs segment");

                // Now verify file names:
                System.String[] expected;
                expected = new System.String[]
                { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" };

                System.String[] actual = dir.ListAll();
                System.Array.Sort(expected);
                System.Array.Sort(actual);
                if (!CollectionsHelper.Equals(expected, actual))
                {
                    Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) +
                                "\n  actual:\n    " + AsString(actual));
                }
                dir.Close();
            }
            finally
            {
                RmDir(outputDir);
            }
        }
Esempio n. 14
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos(); // merge field names
            }

            foreach (IndexReader reader in readers)
            {
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    foreach (IndexReader reader in readers)
                    {
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions)
                        {
                            docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Dispose();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                foreach (IndexReader reader in readers)
                {
                    docCount += reader.NumDocs();
                }
            }

            return(docCount);
        }
Esempio n. 15
0
        private void  Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment             = si.name;
            this.si             = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir    = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != -1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir       = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                {
                    storeDir = null;
                }

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx   = false;
                int  numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                {
                    if (!fieldInfos.FieldInfo(i).omitTf)
                    {
                        anyProx = true;
                    }
                }

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != -1)
                {
                    fieldsSegment = si.GetDocStoreSegment();
                }
                else
                {
                    fieldsSegment = segment;
                }

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                {
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                }
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != -1)
                    {
                        vectorsSegment = si.GetDocStoreSegment();
                    }
                    else
                    {
                        vectorsSegment = segment;
                    }
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
		public virtual void  TestExactFileNames()
		{
			
			for (int pass = 0; pass < 2; pass++)
			{
				
				System.String outputDir = "lucene.backwardscompat0.index";
				RmDir(outputDir);
				
				try
				{
					Directory dir = FSDirectory.Open(new System.IO.FileInfo(FullDir(outputDir)));
					
					bool autoCommit = 0 == pass;
					
					IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
					writer.SetRAMBufferSizeMB(16.0);
					for (int i = 0; i < 35; i++)
					{
						AddDoc(writer, i);
					}
					Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
					writer.Close();
					
					// Delete one doc so we get a .del file:
					IndexReader reader = IndexReader.Open(dir);
					Term searchTerm = new Term("id", "7");
					int delCount = reader.DeleteDocuments(searchTerm);
					Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
					
					// Set one norm so we get a .s0 file:
					reader.SetNorm(21, "content", (float) 1.5);
					reader.Close();
					
					// The numbering of fields can vary depending on which
					// JRE is in use.  On some JREs we see content bound to
					// field 0; on others, field 1.  So, here we have to
					// figure out which field number corresponds to
					// "content", and then set our expected file names below
					// accordingly:
					CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
					FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
					int contentFieldIndex = - 1;
					for (int i = 0; i < fieldInfos.Size(); i++)
					{
						FieldInfo fi = fieldInfos.FieldInfo(i);
						if (fi.name_ForNUnit.Equals("content"))
						{
							contentFieldIndex = i;
							break;
						}
					}
					cfsReader.Close();
					Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment");
					
					// Now verify file names:
					System.String[] expected;
					expected = new System.String[]{"_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen"};
					
					System.String[] actual = dir.ListAll();
					System.Array.Sort(expected);
					System.Array.Sort(actual);
					if (!SupportClass.CollectionsHelper.Equals(expected, actual))
					{
						Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) + "\n  actual:\n    " + AsString(actual));
					}
					dir.Close();
				}
				finally
				{
					RmDir(outputDir);
				}
			}
		}
Esempio n. 17
0
 private void AssertReadOnly(FieldInfos readOnly, FieldInfos modifiable)
 {
     Assert.AreEqual(modifiable.Size(), readOnly.Size());
     // assert we can iterate
     foreach (FieldInfo fi in readOnly)
     {
         Assert.AreEqual(fi.Name, modifiable.FieldInfo(fi.Number).Name);
     }
 }
        public static void CreatePostings()
        {
            TotalPostings = 0;
            TotalPayloadBytes = 0;
            Fields = new SortedDictionary<string, SortedDictionary<BytesRef, long>>();

            int numFields = TestUtil.NextInt(Random(), 1, 5);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + numFields + " fields");
            }
            MaxDoc = 0;

            FieldInfo[] fieldInfoArray = new FieldInfo[numFields];
            int fieldUpto = 0;
            while (fieldUpto < numFields)
            {
                string field = TestUtil.RandomSimpleString(Random());
                if (Fields.ContainsKey(field))
                {
                    continue;
                }

                fieldInfoArray[fieldUpto] = new FieldInfo(field, true, fieldUpto, false, false, true, FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, null, DocValuesType.NUMERIC, null);
                fieldUpto++;

                SortedDictionary<BytesRef, long> postings = new SortedDictionary<BytesRef, long>();
                Fields[field] = postings;
                HashSet<string> seenTerms = new HashSet<string>();

                int numTerms;
                if (Random().Next(10) == 7)
                {
                    numTerms = AtLeast(50);
                }
                else
                {
                    numTerms = TestUtil.NextInt(Random(), 2, 20);
                }

                for (int termUpto = 0; termUpto < numTerms; termUpto++)
                {
                    string term = TestUtil.RandomSimpleString(Random());
                    if (seenTerms.Contains(term))
                    {
                        continue;
                    }
                    seenTerms.Add(term);

                    if (TEST_NIGHTLY && termUpto == 0 && fieldUpto == 1)
                    {
                        // Make 1 big term:
                        term = "big_" + term;
                    }
                    else if (termUpto == 1 && fieldUpto == 1)
                    {
                        // Make 1 medium term:
                        term = "medium_" + term;
                    }
                    else if (Random().NextBoolean())
                    {
                        // Low freq term:
                        term = "low_" + term;
                    }
                    else
                    {
                        // Very low freq term (don't multiply by RANDOM_MULTIPLIER):
                        term = "verylow_" + term;
                    }

                    long termSeed = Random().NextLong();
                    postings[new BytesRef(term)] = termSeed;

                    // NOTE: sort of silly: we enum all the docs just to
                    // get the maxDoc
                    DocsEnum docsEnum = GetSeedPostings(term, termSeed, false, FieldInfo.IndexOptions.DOCS_ONLY);
                    int doc;
                    int lastDoc = 0;
                    while ((doc = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        lastDoc = doc;
                    }
                    MaxDoc = Math.Max(lastDoc, MaxDoc);
                }
            }

            FieldInfos = new FieldInfos(fieldInfoArray);

            // It's the count, not the last docID:
            MaxDoc++;

            GlobalLiveDocs = new FixedBitSet(MaxDoc);
            double liveRatio = Random().NextDouble();
            for (int i = 0; i < MaxDoc; i++)
            {
                if (Random().NextDouble() <= liveRatio)
                {
                    GlobalLiveDocs.Set(i);
                }
            }

            AllTerms = new List<FieldAndTerm>();
            foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields)
            {
                string field = fieldEnt.Key;
                foreach (KeyValuePair<BytesRef, long> termEnt in fieldEnt.Value.EntrySet())
                {
                    AllTerms.Add(new FieldAndTerm(field, termEnt.Key));
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done init postings; " + AllTerms.Count + " total terms, across " + FieldInfos.Size() + " fields");
            }
        }
Esempio n. 19
0
        public virtual void  TestDeleteLeftoverFiles()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(10);
            int i;

            for (i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            writer.SetUseCompoundFile(false);
            for (; i < 45; i++)
            {
                AddDoc(writer, i);
            }
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();

            // Now, artificially create an extra .del file & extra
            // .s0 file:
            System.String[] files = dir.ListAll();

            /*
             * for(int j=0;j<files.length;j++) {
             * System.out.println(j + ": " + files[j]);
             * }
             */

            // The numbering of fields can vary depending on which
            // JRE is in use.  On some JREs we see content bound to
            // field 0; on others, field 1.  So, here we have to
            // figure out which field number corresponds to
            // "content", and then set our expected file names below
            // accordingly:
            CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_2.cfs");
            FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
            int contentFieldIndex         = -1;

            for (i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.name_ForNUnit.Equals("content"))
                {
                    contentFieldIndex = i;
                    break;
                }
            }
            cfsReader.Close();
            Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment");

            System.String normSuffix = "s" + contentFieldIndex;

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already, using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);

            // Create a bogus separate del file for a
            // segment that already has a separate del file:
            CopyFile(dir, "_0_1.del", "_0_2.del");

            // Create a bogus separate del file for a
            // segment that does not yet have a separate del file:
            CopyFile(dir, "_0_1.del", "_1_1.del");

            // Create a bogus separate del file for a
            // non-existent segment:
            CopyFile(dir, "_0_1.del", "_188_1.del");

            // Create a bogus segment file:
            CopyFile(dir, "_0.cfs", "_188.cfs");

            // Create a bogus fnm file when the CFS already exists:
            CopyFile(dir, "_0.cfs", "_0.fnm");

            // Create a deletable file:
            CopyFile(dir, "_0.cfs", "deletable");

            // Create some old segments file:
            CopyFile(dir, "segments_3", "segments");
            CopyFile(dir, "segments_3", "segments_2");

            // Create a bogus cfs file shadowing a non-cfs segment:
            CopyFile(dir, "_2.cfs", "_3.cfs");

            System.String[] filesPre = dir.ListAll();

            // Open & close a writer: it should delete the above 4
            // files and nothing more:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
            writer.Close();

            System.String[] files2 = dir.ListAll();
            dir.Close();

            System.Array.Sort(files);
            System.Array.Sort(files2);

            System.Collections.Hashtable dif = DifFiles(files, files2);

            if (!SupportClass.CollectionsHelper.Equals(files, files2))
            {
                Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif));
            }
        }
Esempio n. 20
0
        public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
        {
            // Open files for TermVector storage
            tvx = directory.CreateOutput(segment + TVX_EXTENSION);
            tvx.WriteInt(FORMAT_VERSION);
            tvd = directory.CreateOutput(segment + TVD_EXTENSION);
            tvd.WriteInt(FORMAT_VERSION);
            tvf = directory.CreateOutput(segment + TVF_EXTENSION);
            tvf.WriteInt(FORMAT_VERSION);

            this.fieldInfos = fieldInfos;
            fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size()));
            terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
        }
Esempio n. 21
0
        internal virtual SegmentReader ReopenSegment(SegmentInfo si)
        {
            lock (this)
            {
                bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
                bool normsUpToDate     = true;


                bool[] fieldNormsChanged = new bool[fieldInfos.Size()];
                if (normsUpToDate)
                {
                    for (int i = 0; i < fieldInfos.Size(); i++)
                    {
                        if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
                        {
                            normsUpToDate        = false;
                            fieldNormsChanged[i] = true;
                        }
                    }
                }

                if (normsUpToDate && deletionsUpToDate)
                {
                    return(this);
                }


                // clone reader
                SegmentReader clone;
                if (readOnly)
                {
                    clone = new ReadOnlySegmentReader();
                }
                else
                {
                    clone = new SegmentReader();
                }

                bool success = false;
                try
                {
                    clone.readOnly       = readOnly;
                    clone.directory      = directory;
                    clone.si             = si;
                    clone.segment        = segment;
                    clone.readBufferSize = readBufferSize;
                    clone.cfsReader      = cfsReader;
                    clone.storeCFSReader = storeCFSReader;

                    clone.fieldInfos            = fieldInfos;
                    clone.tis                   = tis;
                    clone.freqStream            = freqStream;
                    clone.proxStream            = proxStream;
                    clone.termVectorsReaderOrig = termVectorsReaderOrig;


                    // we have to open a new FieldsReader, because it is not thread-safe
                    // and can thus not be shared among multiple SegmentReaders
                    // TODO: Change this in case FieldsReader becomes thread-safe in the future
                    System.String fieldsSegment;

                    Directory storeDir = Directory();

                    if (si.GetDocStoreOffset() != -1)
                    {
                        fieldsSegment = si.GetDocStoreSegment();
                        if (storeCFSReader != null)
                        {
                            storeDir = storeCFSReader;
                        }
                    }
                    else
                    {
                        fieldsSegment = segment;
                        if (cfsReader != null)
                        {
                            storeDir = cfsReader;
                        }
                    }

                    if (fieldsReader != null)
                    {
                        clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                    }


                    if (!deletionsUpToDate)
                    {
                        // load deleted docs
                        clone.deletedDocs = null;
                        clone.LoadDeletedDocs();
                    }
                    else
                    {
                        clone.deletedDocs = this.deletedDocs;
                    }

                    clone.norms = new System.Collections.Hashtable();
                    if (!normsUpToDate)
                    {
                        // load norms
                        for (int i = 0; i < fieldNormsChanged.Length; i++)
                        {
                            // copy unchanged norms to the cloned reader and incRef those norms
                            if (!fieldNormsChanged[i])
                            {
                                System.String curField = fieldInfos.FieldInfo(i).name;
                                Norm          norm     = (Norm)this.norms[curField];
                                norm.IncRef();
                                clone.norms[curField] = norm;
                            }
                        }

                        clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize);
                    }
                    else
                    {
                        System.Collections.IEnumerator it = norms.Keys.GetEnumerator();
                        while (it.MoveNext())
                        {
                            System.String field = (System.String)it.Current;
                            Norm          norm  = (Norm)norms[field];
                            norm.IncRef();
                            clone.norms[field] = norm;
                        }
                    }

                    if (clone.singleNormStream == null)
                    {
                        for (int i = 0; i < fieldInfos.Size(); i++)
                        {
                            FieldInfo fi = fieldInfos.FieldInfo(i);
                            if (fi.isIndexed && !fi.omitNorms)
                            {
                                Directory     d        = si.GetUseCompoundFile() ? cfsReader : Directory();
                                System.String fileName = si.GetNormFileName(fi.number);
                                if (si.HasSeparateNorms(fi.number))
                                {
                                    continue;
                                }

                                if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
                                {
                                    clone.singleNormStream = d.OpenInput(fileName, readBufferSize);
                                    break;
                                }
                            }
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (this.referencedSegmentReader != null)
                    {
                        // this reader shares resources with another SegmentReader,
                        // so we increment the other readers refCount. We don't
                        // increment the refCount of the norms because we did
                        // that already for the shared norms
                        clone.referencedSegmentReader = this.referencedSegmentReader;
                        referencedSegmentReader.IncRefReaderNotNorms();
                    }
                    else
                    {
                        // this reader wasn't reopened, so we increment this
                        // readers refCount
                        clone.referencedSegmentReader = this;
                        IncRefReaderNotNorms();
                    }

                    if (!success)
                    {
                        // An exception occured during reopen, we have to decRef the norms
                        // that we incRef'ed already and close singleNormsStream and FieldsReader
                        clone.DecRef();
                    }
                }

                return(clone);
            }
        }
Esempio n. 22
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            if (mergeDocStores)
            {
                // If the i'th reader is a SegmentReader and has
                // identical fieldName -> number mapping, then this
                // array will be non-null at position i:
                SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];

                // If this reader is a SegmentReader, and all of its
                // field name -> number mappings match the "merged"
                // FieldInfos, then we can do a bulk copy of the
                // stored fields:
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    if (reader is SegmentReader)
                    {
                        SegmentReader segmentReader     = (SegmentReader)reader;
                        bool          same              = true;
                        FieldInfos    segmentFieldInfos = segmentReader.GetFieldInfos();
                        for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
                        {
                            same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
                        }
                        if (same)
                        {
                            matchingSegmentReaders[i] = segmentReader;
                        }
                    }
                }

                // Used for bulk-reading raw bytes for stored fields
                int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];

                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        if (matchingSegmentReader != null)
                        {
                            matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
                        }
                        else
                        {
                            matchingFieldsReader = null;
                        }
                        int maxDoc = reader.MaxDoc();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!reader.IsDeleted(j))
                            {
                                // skip deleted docs
                                if (matchingSegmentReader != null)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                    }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
Esempio n. 23
0
        public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
        {
            // Open files for TermVector storage
            tvx = directory.CreateOutput(segment + TVX_EXTENSION);
            tvx.WriteInt(FORMAT_VERSION);
            tvd = directory.CreateOutput(segment + TVD_EXTENSION);
            tvd.WriteInt(FORMAT_VERSION);
            tvf = directory.CreateOutput(segment + TVF_EXTENSION);
            tvf.WriteInt(FORMAT_VERSION);

            this.fieldInfos = fieldInfos;
            fields          = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size()));
            terms           = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
        }
Esempio n. 24
0
        /** Produce _X.nrm if any document had a field with norms
         *  not disabled */
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            IDictionary <object, object> byField = new Dictionary <object, object>();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                ICollection <object> fields         = entry.Value;
                IEnumerator <object> fieldsIt       = fields.GetEnumerator();
                List <object>        fieldsToRemove = new List <object>(fields.Count);

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <object> l;
                        if (byField.ContainsKey(perField.fieldInfo))
                        {
                            l = (IList <object>)byField[perField.fieldInfo];
                        }
                        else
                        {
                            l = new List <object>();
                            byField[perField.fieldInfo] = l;
                        }
                        //IList<object> l = (IList<object>)byField[perField.fieldInfo];
                        //if (l == null)
                        //{
                        //    l = new List<object>();
                        //    byField[perField.fieldInfo] = l;
                        //}
                        l.Add(perField);
                    }
                    else
                    {
                        // Remove this field since we haven't seen it
                        // since the previous flush
                        fieldsToRemove.Add(perField);
                        //fields.Remove(perField);
                    }
                }
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fields.Remove(fieldsToRemove[i]);
                }
            }

            string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;

            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    List <object> toMerge;
                    int           upto = 0;
                    if (byField.ContainsKey(fieldInfo))
                    {
                        toMerge = (List <object>)byField[fieldInfo];

                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Esempio n. 25
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Collections.IDictionary byField = new System.Collections.Hashtable();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields         = (System.Collections.ICollection)entry.Value;
                System.Collections.IEnumerator fieldsIt       = fields.GetEnumerator();
                System.Collections.ArrayList   fieldsToRemove = new System.Collections.ArrayList();

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new System.Collections.ArrayList();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }

                System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fieldsHT.Remove(fieldsToRemove[i]);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
		public virtual void  TestDeleteLeftoverFiles()
		{
			
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			int i;
			for (i = 0; i < 35; i++)
			{
				AddDoc(writer, i);
			}
			writer.SetUseCompoundFile(false);
			for (; i < 45; i++)
			{
				AddDoc(writer, i);
			}
			writer.Close();
			
			// Delete one doc so we get a .del file:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "7");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
			
			// Set one norm so we get a .s0 file:
			reader.SetNorm(21, "content", (float) 1.5);
			reader.Close();
			
			// Now, artificially create an extra .del file & extra
			// .s0 file:
			System.String[] files = dir.List();
			
			/*
			for(int i=0;i<files.length;i++) {
			System.out.println(i + ": " + files[i]);
			}
			*/
			
			// The numbering of fields can vary depending on which
			// JRE is in use.  On some JREs we see content bound to
			// field 0; on others, field 1.  So, here we have to
			// figure out which field number corresponds to
			// "content", and then set our expected file names below
			// accordingly:
			CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
			FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
			int contentFieldIndex = - 1;
			for (i = 0; i < fieldInfos.Size(); i++)
			{
				FieldInfo fi = fieldInfos.FieldInfo(i);
				if (fi.Name_ForNUnitTest.Equals("content"))
				{
					contentFieldIndex = i;
					break;
				}
			}
			cfsReader.Close();
			Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment");
			
			System.String normSuffix = "s" + contentFieldIndex;
			
			// Create a bogus separate norms file for a
			// segment/field that actually has a separate norms file
			// already:
			CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);
			
			// Create a bogus separate norms file for a
			// segment/field that actually has a separate norms file
			// already, using the "not compound file" extension:
			CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);
			
			// Create a bogus separate norms file for a
			// segment/field that does not have a separate norms
			// file already:
			CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);
			
			// Create a bogus separate norms file for a
			// segment/field that does not have a separate norms
			// file already using the "not compound file" extension:
			CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);
			
			// Create a bogus separate del file for a
			// segment that already has a separate del file: 
			CopyFile(dir, "_0_1.del", "_0_2.del");
			
			// Create a bogus separate del file for a
			// segment that does not yet have a separate del file:
			CopyFile(dir, "_0_1.del", "_1_1.del");
			
			// Create a bogus separate del file for a
			// non-existent segment:
			CopyFile(dir, "_0_1.del", "_188_1.del");
			
			// Create a bogus segment file:
			CopyFile(dir, "_0.cfs", "_188.cfs");
			
			// Create a bogus fnm file when the CFS already exists:
			CopyFile(dir, "_0.cfs", "_0.fnm");
			
			// Create a deletable file:
			CopyFile(dir, "_0.cfs", "deletable");
			
			// Create some old segments file:
			CopyFile(dir, "segments_a", "segments");
			CopyFile(dir, "segments_a", "segments_2");
			
			// Create a bogus cfs file shadowing a non-cfs segment:
			CopyFile(dir, "_2.cfs", "_3.cfs");
			
			System.String[] filesPre = dir.List();
			
			// Open & close a writer: it should delete the above 4
			// files and nothing more:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
			writer.Close();
			
			System.String[] files2 = dir.List();
			dir.Close();
			
			System.Array.Sort(files);
			System.Array.Sort(files2);
			
			if (!ArrayEquals(files, files2))
			{
				Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2));
			}
		}
Esempio n. 27
0
		public void  AddDocument(System.String segment, Document doc)
		{
			// write field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
			float boost = doc.GetBoost();
			for (int i = 0; i < fieldBoosts.Length; i++)
			{
				fieldBoosts[i] = boost;
			}
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(segment);
		}
Esempio n. 28
0
        private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment = si.name;
            this.si = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != - 1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                    storeDir = null;

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx = false;
                int numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                    if (!fieldInfos.FieldInfo(i).omitTf)
                        anyProx = true;

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != - 1)
                    fieldsSegment = si.GetDocStoreSegment();
                else
                    fieldsSegment = segment;

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != - 1)
                        vectorsSegment = si.GetDocStoreSegment();
                    else
                        vectorsSegment = segment;
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {

                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }