Dispose() public method

Commits all changes to an index and closes all associated files. Note that this may be a costly operation, so, try to re-use a single writer instead of closing and opening a new one. See Commit() for caveats about write caching done by some IO devices.

If an Exception is hit during close, eg due to disk full or some other reason, then both the on-disk index and the internal state of the IndexWriter instance will be consistent. However, the close will not be complete even though part of it (flushing buffered documents) may have succeeded, so the write lock will still be held.

If you can correct the underlying cause (eg free up some disk space) then you can call close() again. Failing that, if you want to force the write lock to be released (dangerous, because you may then lose buffered docs in the IndexWriter instance) then you can do something like this:

try { writer.close(); } finally { if (IndexWriter.isLocked(directory)) { IndexWriter.unlock(directory); } } after which, you must be certain not to use the writer instance anymore.

NOTE: if this method hits an OutOfMemoryError you should immediately close the writer, again. See above for details.

public Dispose ( ) : void
return void
        public static void CreateIndexCreative(Creative entity, string IndexPath)
        {
            var document = new Document();
            document.Add(new Field("CreativeId", entity.Creativeid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.Add(new Field("Title", entity.Title, Field.Store.YES, Field.Index.ANALYZED));
            if (!string.IsNullOrEmpty(entity.About))
            {
                document.Add(new Field("About", entity.About, Field.Store.YES, Field.Index.ANALYZED));
            }

            Directory directory = FSDirectory.Open(new DirectoryInfo(IndexPath));
              Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

               var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

              try
              {

            writer.AddDocument(document);

            writer.Optimize();

            writer.Dispose();
              }
             finally
              {

              writer.Dispose();
              }
        }
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);
            const int numSegments = 15;
            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
Beispiel #3
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
        // add/update/clear search index data
        public static void AddUpdateLuceneIndex()
        {
            using (var context = new CreativeNetworkEntities())
            {

                // init lucene
                var analyzer = new StandardAnalyzer(Version.LUCENE_30);
                using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    // add data to lucene search index (replaces older entries if any)
                    foreach (var creative in context.Creative)
                    {
                        _addToLuceneIndex(creative, writer);
                        foreach (var chapter in creative.Chapter)
                        {
                            _addToLuceneIndex(chapter, writer);
                        }
                    }

                    // close handles
                    analyzer.Close();
                    writer.Dispose();
                }
            }
        }
        /// <summary>
        /// Set up a new index in RAM with three test phrases and the supplied Analyzer.
        /// </summary>
        /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception>
        public override void SetUp()
        {
            base.SetUp();
            analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2);
            directory = NewDirectory();
            IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc;
            doc = new Document();
            doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "just another test sentence", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
            writer.AddDocument(doc);

            writer.Dispose();

            reader = DirectoryReader.Open(directory);
            searcher = NewSearcher(reader);
        }
        public void TestGetFilterHandleNumericParseError()
        {
            NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
            filterBuilder.SetStrictMode(false);

            String xml = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='NaN'/>";
            XmlDocument doc = GetDocumentFromString(xml);
            Filter filter = filterBuilder.GetFilter(doc.DocumentElement);
            Store.Directory ramDir = NewDirectory();
            IndexWriter writer = new IndexWriter(ramDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            writer.Commit();
            try
            {
                AtomicReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(ramDir));
                try
                {
                    assertNull(filter.GetDocIdSet(reader.AtomicContext, reader.LiveDocs));
                }
                finally
                {
                    reader.Dispose();
                }
            }
            finally
            {
                writer.Commit();
                writer.Dispose();
                ramDir.Dispose();
            }
        }
        public virtual void TestLucene()
        {
            int num = 100;

            Directory indexA = NewDirectory();
            Directory indexB = NewDirectory();

            FillIndex(Random(), indexA, 0, num);
            bool fail = VerifyIndex(indexA, 0);
            if (fail)
            {
                Assert.Fail("Index a is invalid");
            }

            FillIndex(Random(), indexB, num, num);
            fail = VerifyIndex(indexB, num);
            if (fail)
            {
                Assert.Fail("Index b is invalid");
            }

            Directory merged = NewDirectory();

            IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2)));
            writer.AddIndexes(indexA, indexB);
            writer.ForceMerge(1);
            writer.Dispose();

            fail = VerifyIndex(merged, 0);

            Assert.IsFalse(fail, "The merged index is invalid");
            indexA.Dispose();
            indexB.Dispose();
            merged.Dispose();
        }
        public virtual void TestPrevTermAtEnd()
        {
            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())));
            AddDoc(writer, "aaa bbb");
            writer.Dispose();
            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(Dir));
            TermsEnum terms = reader.Fields.Terms("content").Iterator(null);
            Assert.IsNotNull(terms.Next());
            Assert.AreEqual("aaa", terms.Term().Utf8ToString());
            Assert.IsNotNull(terms.Next());
            long ordB;
            try
            {
                ordB = terms.Ord();
            }
            catch (System.NotSupportedException uoe)
            {
                // ok -- codec is not required to support ord
                reader.Dispose();
                return;
            }
            Assert.AreEqual("bbb", terms.Term().Utf8ToString());
            Assert.IsNull(terms.Next());

            terms.SeekExact(ordB);
            Assert.AreEqual("bbb", terms.Term().Utf8ToString());
            reader.Dispose();
        }
    static void Main(string[] args)
    {
        // Path to index file.
            Directory indexDirectory = FSDirectory.Open(@"/* PASTE THE PATH WHERE YOUR INDEX WILL BE SAVED */");

            // Creating Analyzer to make index searchable.
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            // Creating IndexWriter
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(100000);
            IndexWriter writer = new IndexWriter(indexDirectory, analyzer, mfl);

            // Full path to input .txt files.
            string[] filesList = System.IO.Directory.GetFiles(@"/* PASTE THE PATH TO YOUR INPUT FILE(S) */", "*.txt");

            /* INDEX FIELDS:
            ** id & body are the fields to my Lucene Index,
            ** you can change those fields accordingly to your
            ** needs
            */
            int idNumber = 0;
            string body;

            foreach (string file in filesList)
            {
                body = System.IO.File.ReadAllText(file);
                AddToIndex(idNumber, body, writer);
                idNumber++;
             }
            writer.Dispose();
    }
Beispiel #10
0
        private void GenerateLuceneIndex(List<Entry> entries)
        {
            Microsoft.WindowsAzure.Storage.CloudStorageAccount storageAccount = Microsoft.WindowsAzure.Storage.CloudStorageAccount.Parse("DefaultEndpointsProtocol=https;AccountName=diningsearchstorage;AccountKey=xeYMzXThFxrU7SsAMGbSWLdy9psLFRMk5NI8x0bx24xtEg9MPIstf/xwPdjvDm6HpHZaCPxxVFCv/7DDd5wymA==");
            AzureDirectory azureDirectory = new AzureDirectory(storageAccount, "diningsearchindex");
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            IndexWriter indexWriter = new IndexWriter(azureDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            ////
            int c = 0;
            foreach (var entry in entries)
            {
                c++;
                var item = new Document();
                item.Add(new Field("Id", c.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                item.Add(new Field("Dish", entry.DishName, Field.Store.YES, Field.Index.ANALYZED));
                item.Add(new Field("Cafe/Restaurant", string.Format("{0}/{1}", entry.CafeName, entry.RestaurantName), Field.Store.YES, Field.Index.ANALYZED));
                item.Add(new Field("URL", entry.CafeUrl, Field.Store.YES, Field.Index.NOT_ANALYZED));
                item.Add(new Field("Description", entry.Description ?? string.Empty, Field.Store.YES, Field.Index.ANALYZED));
                item.Add(new Field("Price", entry.Price, Field.Store.YES, Field.Index.NOT_ANALYZED));
                indexWriter.AddDocument(item);
            }

            ////

            indexWriter.Dispose();
            azureDirectory.Dispose();
        }
        public virtual void TestExceptionDuringSave()
        {
            MockDirectoryWrapper dir = NewMockDirectory();
            dir.FailOn(new FailureAnonymousInnerClassHelper(this, dir));
            IndexWriter writer = new IndexWriter(dir, GetConfig(Random(), new PersistentSnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode_e.CREATE_OR_APPEND)));
            writer.AddDocument(new Document());
            writer.Commit();

            PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy)writer.Config.DelPolicy;
            try
            {
                psdp.Snapshot();
            }
            catch (IOException ioe)
            {
                if (ioe.Message.Equals("now fail on purpose"))
                {
                    // ok
                }
                else
                {
                    throw ioe;
                }
            }
            Assert.AreEqual(0, psdp.SnapshotCount);
            writer.Dispose();
            Assert.AreEqual(1, DirectoryReader.ListCommits(dir).Count);
            dir.Dispose();
        }
        public virtual void TestAllSegmentsSmall()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
        }
        public override void SetUp()
        {
            base.SetUp();
            store = NewDirectory();
            IndexWriter writer = new IndexWriter(store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            Document doc;

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Tom", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Jerry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("zzz", "bar", Field.Store.YES));
            writer.AddDocument(doc);

            writer.ForceMerge(1);
            writer.Dispose();
        }
Beispiel #14
0
        public static IndexWriter CreateIndex(Content[] contents)
        {
            var v = Lucene.Net.Util.Version.LUCENE_30;
            var l = Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED;
            var d = FSDirectory.Open(new DirectoryInfo(IndexPath));

            IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(v), l);

            try
            {
                foreach (var item in contents)
                {
                    Document doc = new Document();

                    Field id = new Field("id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
                    Field title = new Field("title", item.Title, Field.Store.YES, Field.Index.ANALYZED);
                    Field username = new Field("username", item.User.UserName, Field.Store.YES, Field.Index.ANALYZED);
                    doc.Add(id);
                    doc.Add(title);
                    doc.Add(username);
                    writer.AddDocument(doc);
                }
                writer.Optimize();
                writer.Dispose();
            }
            catch (System.Exception ex)
            {

            }

            return writer;
        }
        public virtual void TestTermEnum()
        {
            IndexWriter writer = null;

            writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            // ADD 100 documents with term : aaa
            // add 100 documents with terms: aaa bbb
            // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, "aaa");
                AddDoc(writer, "aaa bbb");
            }

            writer.Dispose();

            // verify document frequency of terms in an multi segment index
            VerifyDocFreq();

            // merge segments
            writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND));
            writer.ForceMerge(1);
            writer.Dispose();

            // verify document frequency of terms in a single segment index
            VerifyDocFreq();
        }
 public virtual void TestForceMergeNotNeeded()
 {
     Directory dir = NewDirectory();
     AtomicBoolean mayMerge = new AtomicBoolean(true);
     MergeScheduler mergeScheduler = new SerialMergeSchedulerAnonymousInnerClassHelper(this, mayMerge);
     IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(mergeScheduler).SetMergePolicy(MergePolicy()));
     writer.Config.MergePolicy.NoCFSRatio = Random().NextBoolean() ? 0 : 1;
     int numSegments = TestUtil.NextInt(Random(), 2, 20);
     for (int i = 0; i < numSegments; ++i)
     {
         int numDocs = TestUtil.NextInt(Random(), 1, 5);
         for (int j = 0; j < numDocs; ++j)
         {
             writer.AddDocument(new Document());
         }
         writer.Reader.Dispose();
     }
     for (int i = 5; i >= 0; --i)
     {
         int segmentCount = writer.SegmentCount;
         int maxNumSegments = i == 0 ? 1 : TestUtil.NextInt(Random(), 1, 10);
         mayMerge.Set(segmentCount > maxNumSegments);
         writer.ForceMerge(maxNumSegments);
     }
     writer.Dispose();
     dir.Dispose();
 }
Beispiel #17
0
        void Index()
        {
            var conf = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT));

            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, conf /*new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED*/);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            Document doc;
            Rd1 = NewDirectory();
            IndexWriter iw1 = new IndexWriter(Rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the quick brown fox jumps", Field.Store.YES));
            doc.Add(NewTextField("field2", "the quick brown fox jumps", Field.Store.YES));
            iw1.AddDocument(doc);

            iw1.Dispose();
            Rd2 = NewDirectory();
            IndexWriter iw2 = new IndexWriter(Rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the fox jumps over the lazy dog", Field.Store.YES));
            doc.Add(NewTextField("field3", "the fox jumps over the lazy dog", Field.Store.YES));
            iw2.AddDocument(doc);

            iw2.Dispose();

            this.Ir1 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd1));
            this.Ir2 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd2));
        }
        public void ClearIndex()
        {
            if (System.IO.Directory.GetFiles(this.index.Directory.FullName).Any())
            {
                try
                {
                    var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                    using (var writer = new IndexWriter(this.index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        // remove older index entries
                        writer.DeleteAll();

                        // close handles
                        analyzer.Close();
                        writer.Dispose();
                    }

                    ForceUnlockIndex();
                }
                catch (Exception)
                {
                    throw;
                }
            }
        }
        public void CreateIndex()
        {

            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter
              (dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            try
            {
                for (int docid = 0; docid < NUM_DOCS; docid++)
                {
                    Document d = new Document();
                    d.Add(NewStringField("docid", "" + docid, Field.Store.YES));
                    d.Add(NewStringField("never_load", "fail", Field.Store.YES));
                    foreach (string f in FIELDS)
                    {
                        for (int val = 0; val < NUM_VALUES; val++)
                        {
                            d.Add(NewStringField(f, docid + "_" + f + "_" + val, Field.Store.YES));
                        }
                    }
                    d.Add(NewStringField("load_later", "yes", Field.Store.YES));
                    writer.AddDocument(d);
                }
            }
            finally
            {
                writer.Dispose();
            }
        }
        public virtual void TestFixedBinary()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
               .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document doc = new Document();
            var bytes = new byte[4];
            BytesRef data = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 24);
                bytes[1] = (byte)(i >> 16);
                bytes[2] = (byte)(i >> 8);
                bytes[3] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef();
                BinaryDocValues dv = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 24);
                    bytes[1] = (byte)(expectedValue >> 16);
                    bytes[2] = (byte)(expectedValue >> 8);
                    bytes[3] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Beispiel #22
0
        public static void Arquive(string text, string url, bool isEnglish)
        {
            //INICIALIZAR VARIÁVEIS CONSOANTE SER EM INGLÊS OU EM PORTUGUÊS
            string directory;
            Lucene.Net.Analysis.Analyzer analyzer;
            Object _mon= isEnglish? _monEn : _monPt;
            InitPathAndAnalyzer(out directory, out analyzer, isEnglish);

            //CRIAR UM NOVO DOCUMENTO COM A INFORMAÇÃO NECESSÁRIA: UM CAMPO COM O URL E UM OUTRO COM O CONTEUDO A ANALIZAR
            string hash= text.GetHashCode().ToString();
            Document document = new Document();
            document.Add(new Field("url", url, Field.Store.YES,Field.Index.NOT_ANALYZED));
            document.Add(new Field("text", text, Field.Store.NO, Field.Index.ANALYZED));
            document.Add(new Field("hash",hash, Field.Store.YES, Field.Index.NOT_ANALYZED));

            Monitor.Enter(_mon);
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo(directory));
            if (System.IO.Directory.GetFiles(directory).Length == 0)
            {
                IndexWriter init = new IndexWriter(dir, analyzer, true, new IndexWriter.MaxFieldLength(25000));
                init.Dispose();
            }

            //VERIFICAR SE O DOCUMENTO JÁ EXISTE E SE CONTEM A MESMA INFORMAÇÃO QUE JÁ LÁ SE ENCONTRA

            IndexSearcher isearcher = new IndexSearcher(dir, false);
            //O CAMPO DE PROCURA SERA O URL
            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "url", analyzer);
            Query query = parser.Parse(url);
            int s = isearcher.IndexReader.NumDocs();
            //Console.WriteLine(isearcher.IndexReader.NumDocs());

            ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs;
            if(hits.Length>0){
                 Document doc = isearcher.Doc(hits[0].Doc);

                 //Console.WriteLine(doc.Get("url"));

                //É IGUAL
                 if (doc.Get("hash").Equals(hash))
                 {
                     Monitor.Exit(_mon);
                     return;
                 }
                 //É DIFERENTE
                 else
                 {
                     isearcher.IndexReader.DeleteDocument(hits[0].Doc);

                 }
            }
            isearcher.Dispose();
            //ACEDER AO INDEX COM A INFORMAÇÃO INDEXADA DA LINGUAGEM CORRECTA E ADICIONAR O NOVO DOCUMENTO
            IndexWriter iwriter = new IndexWriter(dir, analyzer, false, new IndexWriter.MaxFieldLength(25000));
            iwriter.AddDocument(document);
            iwriter.Optimize();
            iwriter.Dispose();
            Monitor.Exit(_mon);
        }
        static Analyzer analyzer = new MMSegAnalyzer(); //MMSegAnalyzer //StandardAnalyzer
        static void Main(string[] args)
        {

            string[] texts = new string[] { 
                "京华时报1月23日报道 昨天,受一股来自中西伯利亚的强冷空气影响,本市出现大风降温天气,白天最高气温只有零下7摄氏度,同时伴有6到7级的偏北风。",
                "【AppsFlyer:社交平台口碑营销效果最佳http://t.cn/zTHEQRM】社交网络分享应用的方式,在新应用获取用户非常有效率。搜索方式可为移动应用带来最高玩家质量,但玩家量和转化率较低。广告可带来最大用户量,但用户质量却相对不高,转化率也不够高。刺激性流量的转化率最高,但是平均玩家质量是最低",
                "Server Core省去了Windows Server的图形界面,改为命令行的方式来管理服务器。它不仅拥有更精简的体积与更优化的性能,还可缩短50%-60%的系统更新时间。现在,SQL Server已经支持Windows Server Core,计划内停机时间的大幅缩减让企业关键数据库应用获得更高的可用性。",
                "【各移动游戏分发渠道的优势和分成比例】联通沃商店:线下资源和话费支付能力(分成比例3:7),触控:技术和运营能力(分成比例5:5),91无线:评测、运营、数据等服务能力(分成比例4:6),UC:用户接入点、WebApp的支持(分成比例5:5),斯凯网络:硬件厂商资源(分成比例3:7)http://t.cn/zTHnwJk",
                "iPod之父创办的Nest收购家居能源数据监测服务MyEnergy,将从小小恒温器进展为家居节能整套方案 |Nest公司要做的并不只是一个小小温控器,而是提供智能家居节能整套方案。而MyEnergy积累的数据能对Nest起到很大帮助,并且也为Nest带来更多能源服务商方面的联系: http://t.cn/zTHs8qQ",
                "今日,58同城将正式与支付宝达成战略合作。这既是支付宝首次为阿里系外的企业提供担保支付服务,也是58同城推动消费者保障服务在支付和结算方面迈出的重要一步,此番合作将对整个行业产生颠覆性的影响。58要做的就是不断的了解用户痛点,不断的与虚假信息斗争,建立一个人人信赖的生活服务平台。",
                "【iPhone如何征服日本】虽然日本身为现代移动技术的摇篮,智能手机和触屏设备的普及也领先于其他地区,但iPhone仍然顺利地征服这个岛国,成为该国最畅销的手机。一方面得益于女性用户的追捧,但更多地,还是源自日本移动行业的内在问题。http://t.cn/zTHENrI",
                "【东方体育中心游泳馆今起重新开放,成人票20元/场】#爱体育#“立夏”过了,夏天近了,喜欢游泳的筒子心痒难耐了吧!@965365上海体育发布 说,经过一个多月的内装修,东方体育中心室内游泳馆今天起重新对外开放,开放时间为13:00-21:00,票价详情点大图了解~今夏挥洒汗水,“玉兰桥”走起!",
                "【深圳地铁免费伞 一年借出2000把归还70把】深圳地铁站摆放了“红雨伞”,下雨时可免费借给乘客使用。但一年来,地铁借给市民2000多把雨伞,只还回来70把,有的站甚至已经没有雨伞了。工作人员尝试联系部分借伞人,发现登记电话号码常常显示是空号……地铁站的红雨伞,你借了会还吗?(南方都市报)",
                "【银行的速度,移动互联网的速度】招商银行信用卡副总经理彭千在GMIC上分享招商银行移动互联网尝试案例:先后和开心和人人推出联名信用卡,但银行动作太慢了,推出是开心网已经不开心了,人人网已经没有人了!",
                "【黑石超级公关】4月21日的新闻联播上,黑石集团主席施瓦茨曼向清华大学捐赠1亿美元,并牵头筹集2亿美元,投资3亿美元与清华大学合作筹建“苏世民书院”的新闻被列为头条。很明显“未来中国不再是选修课,而是必修课。”1亿美元投资清华,背后是扭转坑中投形象的战略公关…",
                "【传谷歌将效仿苹果开设谷歌眼镜零售店】科技博客Business Insider今天援引消息人士说法称,谷歌正计划开设零售店,销售谷歌眼镜。谷歌门店或将专为眼镜产品服务,即只展示各类品牌、型号的“谷歌眼镜”产品。早前的消息指出,谷歌拟效仿苹果和微软,计划推出自主品牌的零售门店,以展示旗下各类产品。",
                "【武汉一高中禁止学生校内用手机 现场砸毁】近期在武昌东亭二路,一所学校收缴并砸毁学生手机24部,其中包括iPhone5等较昂贵的智能手机,也有价值数百元的普通手机,并设“手机尸体展示台”展出近期砸毁的部分手机残骸,均已经无法使用。",
                "【小偷慌不择路当街撒钱 警民携手完璧归赵】日前,一男子来到青浦一小作坊佯装购买商品,后借机溜进卧室行窃。老板娘在周围群众的帮助下将男子扭获,男子见势不妙,掏出一沓钞票当街抛撒。民警到达现场后,将男子抛撒的钱一一清点,共计6600元。警察蜀黍真心想为当天帮忙捡钱的群众竖起大拇指!",
                "#瓜大活动预告#【风起云涌南山下,群雄逐鹿辩工大】经过层层奋战,软件与微电子学院和理学院最终杀入了决赛,巅峰对决,即将展开。智慧的火花,头脑的竞技,唇舌的交战,精彩,一触即发。5月13日,周一晚七点,翱翔学生中心,我们与你不见不散!via人人网@西北工业大学_学生会",
                "#GMIC#尚伦律师事务所合伙人张明若律师:在中国,发生了很多起创业者因为法律意识淡薄,在天使投融资期间甚至没有签订法律文件的创业悲剧。这份文件的目的是帮助暂时还请不起律师的创业者。这份法律文件模板简单、对买卖双方公平、且免费!",
                "【金蝶随手记创始人谷风:先自我否定,再创新!】当创业者们把目光聚焦在娱乐、社交、手游、电商这些热门品类时,相信没有多少人会料到记账这一细分领域里也有产品能做到6000万级别的用户规模,堪称“屌丝逆袭”。http://t.cn/zTQvB16",
                "【陕西回应省纪委人员开车打架致死案:车辆是私家车 车主是纪委临时工】乾县青仁村发生斗殴,一死两伤,嫌犯开的显示单位为陕西省纪委的轿车引起质疑。陕西公安厅称,陕VHA088克莱斯勒轿车系嫌犯付某借用朋友的私家车。乾县公安局此前通报,陕VHA088车主是陕西省纪委临时工范小勇http://t.cn/zTQP5kC",
                "【经典干货!25个强大的PS炫光特效教程】这些经典的特效教程是很多教PS老师们的课堂案例,更被很多出版物摘录其中。那么今天毫无保留的全盘托出,同学们一定要好好练习。完成的同学也可以到优设群交作业哟,给大家分享你的设计过程和经验心得:)@尼拉贡戈小麦穗 →http://t.cn/zTHdOCK",
                "【树莓派的三个另类“武装”玩法】树莓派(Raspberry Pi)一直以来以极低的价格和“信用卡大小”的尺寸为人追捧。对于爱折腾的发烧友来说,永远可以在常人意想不到的地方发挥出自己折腾的功力。当一般人仅仅研究其编程玩法时,另一帮人已经琢磨着要把树莓派“武装”成另一个样子。http://t.cn/zTHFxIS",
                "【媒体札记:白宫信访办】@徐达内:19年前铊中毒的清华女生朱令的同情者,找到了“白宫请愿”这个易于操作又声势浩大的方法,要求美国将朱当年的室友孙维驱逐出境。随着意见领袖和各大媒体的加入,这一“跨国抗议” 的景观搅动了对官方公信力,冤假错案判断标准的全民讨论。http://t.cn/zTHsLIC",
                "【传第七大道页游海外月流水近1亿元http://t.cn/zTQPnnv】根据消息人士的透露,第七大道目前旗下网页游戏海外月流水收入已达近1亿元人民币,实质已是国内游戏公司海外收入第一,已超过大家所熟知的端游上市公司。孟治昀如是表示:“谁能告诉我,中国网游企业出口收入哪家公司高于第七大道?”",
                "【简介:他废掉了一切不服者】弗格森执教曼联26年,夺得13个英超冠军,4个联赛杯冠军,5个足总杯冠军,2个欧冠冠军,1个世俱杯冠军,1个优胜者杯冠军,1个欧洲超级杯。如果非要用一句话来总结他的伟大,小编个人的总结是:他废掉了一切敢于“不服者”,包括小贝同学",
                "这个世界干啥最赚钱?历史证明,持续保持对一个国家进行专制统治,通过无节制的赋税和滥发货币来掠夺全体国民的私人财富是唯一的标准答案。历史在进步,这种商业模式也在改头换面,于是,党专制替代家族专制,集体世袭权利代替个体世袭权力。既然改头换面,理论体系也得改变,这个理论体系就是特色论。",
                "【拥有“全球最美海滩”的塞舌尔将对中国游客免签!】#便民提示#准备出国白相的筒子冒个泡吧~你们有福啦。拥有“全球最美丽的海滩”和“最洁净的海水”美誉的塞舌尔,将可凭我国有效护照免签入境,最多停留30天这里还是英国威廉王子和王妃的蜜月地~~所以,别再只盯着马尔代夫一处啦",
                "【用数据告诉你手游有多热】今天,作为本届GMIC 的一部分,GGS全球移动游戏峰会召开。嘉宾和游戏开发者们探讨了移动游戏的现状与发展趋势。手游则是最为重要的一大关键词。盛大游戏总裁钱东海分享了日本最大手游公司CEO预测的数据:2015年全球游戏产业的格局中80%都是手机游戏。http://t.cn/zTHdkFY"
            };

            IndexWriter iw = new IndexWriter(FSDirectory.Open(INDEX_DIR), analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            int i = 0;
            foreach (string text in texts)
            {
                Document doc = new Document();
                doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED));
                iw.AddDocument(doc);
                Console.WriteLine("Indexed doc: {0}", text);
            }
            iw.Commit();
            iw.Optimize();
            iw.Dispose();

            Console.WriteLine();

            Console.WriteLine("Building index done!\r\n\r\n");

            while (true)
            {
                Console.Write("Enter the keyword: ");
                string keyword = Console.ReadLine();
                Search(keyword);
                Console.WriteLine();
            }

            //Console.ReadLine();
        }
 public virtual void TestNewestSegment_Mem()
 {
     Directory directory = NewDirectory();
     IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
     Assert.IsNull(writer.NewestSegment());
     writer.Dispose();
     directory.Dispose();
 }
        public virtual void TestDoubleOffsetCounting()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets = true;
            Field f = NewField("field", "abcd", customType);
            doc.Add(f);
            doc.Add(f);
            Field f2 = NewField("field", "", customType);
            doc.Add(f2);
            doc.Add(f);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            Terms vector = r.GetTermVectors(0).Terms("field");
            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("", termsEnum.Term().Utf8ToString());

            // Token "" occurred once
            Assert.AreEqual(1, termsEnum.TotalTermFreq());

            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            // Token "abcd" occurred three times
            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next());
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.AreEqual(3, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(4, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());

            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            Assert.IsNull(termsEnum.Next());
            r.Dispose();
            dir.Dispose();
        }
 public void IndexText(Directory directory, string text)
 {
     Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
     IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
     Document document = new Document();
     document.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
     indexWriter.AddDocument(document);
     indexWriter.Dispose();
 }
 public override void CreateIndex()
 {
     using (IndexWriter indexWriter = new IndexWriter(this.index, new UnidadeFisicaSearcher.InstancePerFieldAnalyzerWrapper().instancePerFieldAnalyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         GISAUtils.getAllUnidadesFisicasIds().ToList().ForEach(id => indexWriter.AddDocument(this.GetDocument(id)));
         indexWriter.Optimize();
         indexWriter.Dispose();
     }
 }
Beispiel #28
0
 public override void CreateIndex()
 {
     using(IndexWriter indexWriter = new IndexWriter(this.index, new SynonymAnalyzer(new XmlSynonymEngine()), IndexWriter.MaxFieldLength.UNLIMITED))
     {
         GISAUtils.getAllTipologiasIds().ToList().ForEach(id => indexWriter.AddDocument(this.GetDocument(id)));
         indexWriter.Optimize();
         indexWriter.Dispose();
     }
 }
Beispiel #29
0
        public virtual void TestPayloadFieldBit()
        {
            Directory ram = NewDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();
            // this field won't have any payloads
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // only add payload data for field f2
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            writer.AddDocument(d);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should not be set.");
            reader.Dispose();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            analyzer = new PayloadAnalyzer(); // Clear payload state for each field
            writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE));
            d = new Document();
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // add payload data for field f2 and f3
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            fi = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should be set.");
            reader.Dispose();
            ram.Dispose();
        }
Beispiel #30
0
 public static void Optimize()
 {
     var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     using (var writer = new IndexWriter(Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         analyzer.Close();
         writer.Optimize();
         writer.Dispose();
     }
 }
        public void FixtureSetup()
        {
            directory = new RAMDirectory();
            customdAnalyzer = new GISAServer.Search.CustomAnalyzer.CustomAnalyzer();
            writer = new IndexWriter(directory, customdAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED);
            AddDocuments(new List<string>() { "[Rosa]", "Rosa", "Olá" });
            writer.Dispose();

            searcher = new IndexSearcher(directory);
        }
Beispiel #32
0
        public void Test_Index_ReusableStringReader()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            Lucene.Net.Documents.Field    f1  = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f1);
            wr.AddDocument(doc);

            wr.Dispose();
        }
Beispiel #33
0
        /// <summary>
        /// Save to Index
        /// </summary>
        /// <param name="item"></param>
        /// <param name="indexPath"></param>
        public static bool SaveToIndex(SiteSearchItem item, string indexPath)
        {
            bool isSuceess = false;

            try
            {
                //Delete First
                DeleteIndex(item, indexPath);

                if (item.IsDeleted)
                {
                    return(true);
                }


                //Add to index
                var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexPath));
                var analyzer       = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                var writer         = new Lucene.Net.Index.IndexWriter(indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

                var doc = new Document();
                doc.Add(new Field("ID", item.ID, Field.Store.YES, Field.Index.NOT_ANALYZED));

                //Set Boost in title
                var titleField = new Field("Title", item.Title, Field.Store.YES, Field.Index.ANALYZED);
                titleField.Boost = 1.3f;
                doc.Add(titleField);

                //Set Boost in Description
                var descriptionField = new Field("Description", item.Description, Field.Store.YES, Field.Index.ANALYZED);
                descriptionField.Boost = 1.1f;
                doc.Add(descriptionField);

                //Default boost 1f in Document Description
                doc.Add(new Field("DocumentDescription", item.DocumentDescription, Field.Store.YES, Field.Index.ANALYZED));

                doc.Add(new Field("URL", item.URL, Field.Store.YES, Field.Index.NOT_ANALYZED));

                writer.AddDocument(doc);

                analyzer.Close();
                writer.Dispose();

                isSuceess = true;
            }
            catch (Exception ex)
            {
                ErrorLog.WriteLog("SiteSearchService", "SaveToIndex", ex, string.Empty);
            }

            return(isSuceess);
        }
        protected virtual Task <bool> UpdateIndex(IEnumerable <TLuceneEntity> articles, Lucene.Net.Store.Directory directory)
        {
            if (articles.Count() <= 0)
            {
                return(Task.FromResult(false));
            }

            var    propertyInfo = typeof(TLuceneEntity).GetProperties().FirstOrDefault(p => p.CustomAttributes.Any(ca => ca.AttributeType == typeof(LuceneKeyAttribute)));
            string keyName      = propertyInfo?.Name.ToString().ToLower();

            if (string.IsNullOrEmpty(keyName))
            {
                return(Task.FromResult(true));
            }

            IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, new Lucene.Net.Index.IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer));

            var tempQuery = new PhraseQuery();

            foreach (var item in articles)
            {
                string value = propertyInfo.GetValue(item).ToString();

                tempQuery.Add(new Term(keyName, value));
            }

            var boolQuery = new BooleanQuery();

            boolQuery.Add(tempQuery, Occur.MUST);

            var queryParser = new QueryParser(LuceneVersion.LUCENE_48, keyName, _analyzer);

            var query = queryParser.Parse(boolQuery.ToString());

            writer.DeleteDocuments(query);

            try
            {
                CreateDocument(articles, writer);
            }
            catch
            {
                Lucene.Net.Index.IndexWriter.Unlock(directory);
                throw;
            }
            finally
            {
                writer.Dispose();
            }
            return(Task.FromResult(true));
        }
Beispiel #35
0
        public void Test_Store_RAMDirectory()
        {
            Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory();

            //Index 1 Doc
            Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(Version.LUCENE_CURRENT);
            var conf = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);

            Lucene.Net.Index.IndexWriter  wr  = new Lucene.Net.Index.IndexWriter(ramDIR, conf /*new Lucene.Net.Analysis.WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED*/);
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
            wr.AddDocument(doc);
            wr.Dispose();

            //now serialize it
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, ramDIR);

            //Close DIR
            ramDIR.Dispose();
            ramDIR = null;

            //now deserialize
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream);

            //Add 1 more doc
            Lucene.Net.Analysis.Analyzer analyzer2 = new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(Version.LUCENE_CURRENT);
            var conf2 = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);

            wr  = new Lucene.Net.Index.IndexWriter(ramDIR2, conf2 /*new Lucene.Net.Analysis.WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED*/);
            doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
            wr.AddDocument(doc);
            wr.Dispose();

            Lucene.Net.Search.TopDocs topDocs;
            //Search
            using (var reader = DirectoryReader.Open(ramDIR2))
            {
                Lucene.Net.Search.IndexSearcher             s  = new Lucene.Net.Search.IndexSearcher(reader);
                Lucene.Net.QueryParsers.Classic.QueryParser qp = new Lucene.Net.QueryParsers.Classic.QueryParser(Version.LUCENE_CURRENT, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT));
                Lucene.Net.Search.Query q = qp.Parse("value1");
                topDocs = s.Search(q, 100);
            }

            Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174");
        }
Beispiel #36
0
        void LUCENENET_100_CreateIndex()
        {
            Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Field    f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            Lucene.Net.Documents.Field    f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED);
            Lucene.Net.Documents.Document d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED);
            d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            w.Dispose();
        }
Beispiel #37
0
 public void SaveAndDispose()
 {
     //if (doc != null)
     //{
     //    doc = null;
     //}
     if (analyzer != null)
     {
         analyzer.Dispose();
         analyzer = null;
     }
     if (writer != null)
     {
         writer.Optimize();
         writer.Commit();
         writer.Dispose();
         writer = null;
     }
     if (direc != null)
     {
         direc.Dispose();
         direc = null;
     }
 }
        private bool CreateIndex(IEnumerable <TLuceneEntity> articles, Lucene.Net.Store.Directory directory)
        {
            if (articles.Count() <= 0)
            {
                return(false);
            }

            IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, new Lucene.Net.Index.IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer));

            try
            {
                CreateDocument(articles, writer);
            }
            catch
            {
                Lucene.Net.Index.IndexWriter.Unlock(directory);
                throw;
            }
            finally
            {
                writer.Dispose();
            }
            return(true);
        }
Beispiel #39
0
        public virtual void TestOpenPriorSnapshot()
        {
            Directory dir = NewDirectory();

            IndexWriter           writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10)));
            KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy;

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
                if ((1 + i) % 2 == 0)
                {
                    writer.Commit();
                }
            }
            writer.Dispose();

            ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);

            Assert.AreEqual(5, commits.Count);
            IndexCommit lastCommit = null;

            foreach (IndexCommit commit in commits)
            {
                if (lastCommit == null || commit.Generation > lastCommit.Generation)
                {
                    lastCommit = commit;
                }
            }
            Assert.IsTrue(lastCommit != null);

            // Now add 1 doc and merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy));
            AddDoc(writer);
            Assert.AreEqual(11, writer.NumDocs);
            writer.ForceMerge(1);
            writer.Dispose();

            Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count);

            // Now open writer on the commit just before merge:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);

            // Should undo our rollback:
            writer.Rollback();

            DirectoryReader r = DirectoryReader.Open(dir);

            // Still merged, still 11 docs
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(11, r.NumDocs);
            r.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);
            // Commits the rollback:
            writer.Dispose();

            // Now 8 because we made another commit
            Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count);

            r = DirectoryReader.Open(dir);
            // Not fully merged because we rolled it back, and now only
            // 10 docs
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Re-merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy));
            writer.ForceMerge(1);
            writer.Dispose();

            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Now open writer on the commit just before merging,
            // but this time keeping only the last commit:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);

            // Reader still sees fully merged index, because writer
            // opened on the prior commit has not yet committed:
            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            writer.Dispose();

            // Now reader sees not-fully-merged index:
            r = DirectoryReader.Open(dir);
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            dir.Dispose();
        }
Beispiel #40
0
 public void DisposeWriter()
 {
     Writer.Optimize();
     //Writer.Commit();
     Writer.Dispose();
 }
Beispiel #41
0
        public virtual void TestAddDocumentOnDiskFull()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: pass="******"TEST: cycle: diskFree=" + diskFree);
                    }
                    MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory());
                    dir.MaxSizeInBytes = diskFree;
                    IndexWriter     writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                    IMergeScheduler ms     = writer.Config.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        // this test intentionally produces exceptions
                        // in the threads that CMS launches; we don't
                        // want to pollute test output with these.
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }

                    bool hitError = false;
                    try
                    {
                        for (int i = 0; i < 200; i++)
                        {
                            AddDoc(writer);
                        }
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: done adding docs; now commit");
                        }
                        writer.Commit();
                    }
                    catch (IOException e)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: exception on addDoc");
                            Console.WriteLine(e.StackTrace);
                        }
                        hitError = true;
                    }

                    if (hitError)
                    {
                        if (doAbort)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine("TEST: now rollback");
                            }
                            writer.Rollback();
                        }
                        else
                        {
                            try
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now close");
                                }
                                writer.Dispose();
                            }
                            catch (IOException e)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: exception on close; retry w/ no disk space limit");
                                    Console.WriteLine(e.StackTrace);
                                }
                                dir.MaxSizeInBytes = 0;
                                writer.Dispose();
                            }
                        }

                        //TestUtil.SyncConcurrentMerges(ms);

                        if (TestUtil.AnyFilesExceptWriteLock(dir))
                        {
                            TestIndexWriter.AssertNoUnreferencedFiles(dir, "after disk full during addDocument");

                            // Make sure reader can open the index:
                            DirectoryReader.Open(dir).Dispose();
                        }

                        dir.Dispose();
                        // Now try again w/ more space:

                        diskFree += TestNightly ? TestUtil.NextInt32(Random, 400, 600) : TestUtil.NextInt32(Random, 3000, 5000);
                    }
                    else
                    {
                        //TestUtil.SyncConcurrentMerges(writer);
                        dir.MaxSizeInBytes = 0;
                        writer.Dispose();
                        dir.Dispose();
                        break;
                    }
                }
            }
        }
 public void CleanUp() // Cleans up indexer
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
Beispiel #43
0
        public virtual void TestExpirationTimeDeletionPolicy()
        {
            const double SECONDS = 2.0;

            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(this, dir, SECONDS));
            MergePolicy       mp   = conf.MergePolicy;

            mp.NoCFSRatio = 1.0;
            IndexWriter writer = new IndexWriter(dir, conf);
            ExpirationTimeDeletionPolicy policy     = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy;
            IDictionary <string, string> commitData = new Dictionary <string, string>();

            commitData["commitTime"] = Convert.ToString(J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            writer.SetCommitData(commitData);
            writer.Commit();
            writer.Dispose();

            long lastDeleteTime  = 0;
            int  targetNumDelete = TestUtil.NextInt32(Random, 1, 5);

            while (policy.numDelete < targetNumDelete)
            {
                // Record last time when writer performed deletes of
                // past commits
                lastDeleteTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                conf           = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy);
                mp             = conf.MergePolicy;
                mp.NoCFSRatio  = 1.0;
                writer         = new IndexWriter(dir, conf);
                policy         = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy;
                for (int j = 0; j < 17; j++)
                {
                    AddDoc(writer);
                }
                commitData = new Dictionary <string, string>();
                commitData["commitTime"] = Convert.ToString(J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                writer.SetCommitData(commitData);
                writer.Commit();
                writer.Dispose();

                Thread.Sleep((int)(1000.0 * (SECONDS / 5.0)));
            }

            // Then simplistic check: just verify that the
            // segments_N's that still exist are in fact within SECONDS
            // seconds of the last one's mod time, and, that I can
            // open a reader on each:
            long gen = SegmentInfos.GetLastCommitGeneration(dir);

            string fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

            dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);

            bool oneSecondResolution = true;

            while (gen > 0)
            {
                try
                {
                    IndexReader reader = DirectoryReader.Open(dir);
                    reader.Dispose();
                    fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    // if we are on a filesystem that seems to have only
                    // 1 second resolution, allow +1 second in commit
                    // age tolerance:
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir, fileName);
                    long modTime = Convert.ToInt64(sis.UserData["commitTime"]);
                    oneSecondResolution &= (modTime % 1000) == 0;
                    long leeway = (long)((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000);

                    Assert.IsTrue(lastDeleteTime - modTime <= leeway, "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted ");
                }
                catch (Exception e) when(e.IsIOException())
                {
                    // OK
                    break;
                }

                dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                gen--;
            }

            dir.Dispose();
        }
Beispiel #44
0
        public virtual void TestPositions()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document    d        = new Document();

            // f1,f2,f3: docs only
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_ONLY;

            Field f1 = NewField("f1", "this field has docs only", ft);

            d.Add(f1);

            Field f2 = NewField("f2", "this field has docs only", ft);

            d.Add(f2);

            Field f3 = NewField("f3", "this field has docs only", ft);

            d.Add(f3);

            FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);

            ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS;

            // f4,f5,f6 docs and freqs
            Field f4 = NewField("f4", "this field has docs and freqs", ft2);

            d.Add(f4);

            Field f5 = NewField("f5", "this field has docs and freqs", ft2);

            d.Add(f5);

            Field f6 = NewField("f6", "this field has docs and freqs", ft2);

            d.Add(f6);

            FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);

            ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;

            // f7,f8,f9 docs/freqs/positions
            Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3);

            d.Add(f7);

            Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3);

            d.Add(f8);

            Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3);

            d.Add(f9);

            writer.AddDocument(d);
            writer.ForceMerge(1);

            // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
            // and docs/freqs/positions for f3, f6, f9
            d = new Document();

            // f1,f4,f7: docs only
            f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            f4 = NewField("f4", "this field has docs only", ft);
            d.Add(f4);

            f7 = NewField("f7", "this field has docs only", ft);
            d.Add(f7);

            // f2, f5, f8: docs and freqs
            f2 = NewField("f2", "this field has docs and freqs", ft2);
            d.Add(f2);

            f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            f8 = NewField("f8", "this field has docs and freqs", ft2);
            d.Add(f8);

            // f3, f6, f9: docs and freqs and positions
            f3 = NewField("f3", "this field has docs and freqs and positions", ft3);
            d.Add(f3);

            f6 = NewField("f6", "this field has docs and freqs and positions", ft3);
            d.Add(f6);

            f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            // docs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions);
            // docs + docs/freqs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions);
            // docs + docs/freqs/pos = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions);
            // docs/freqs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions);
            // docs/freqs + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions);
            // docs/freqs + docs/freqs/pos = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions);
            // docs/freqs/pos + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions);
            // docs/freqs/pos + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions);
            // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions);

            reader.Dispose();
            ram.Dispose();
        }
        // Runs test, with multiple threads, using the specific
        // failure to trigger an IOException
        public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, MockDirectoryWrapper.Failure failure)
        {
            int NUM_THREADS = 3;

            for (int iter = 0; iter < 2; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                MockDirectoryWrapper dir = NewMockDirectory();
                var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                             .SetMaxBufferedDocs(2)
                             .SetMergeScheduler(newScheduler())
                             .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, true, NewField);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                Thread.Sleep(10);

                dir.FailOn(failure);
                failure.SetDoFail();

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                    Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable");
                }

                bool success = false;
                try
                {
                    writer.Dispose(false);
                    success = true;
                }
                catch (IOException)
                {
                    failure.ClearDoFail();
                    writer.Dispose(false);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: success=" + success);
                }

                if (success)
                {
                    IndexReader reader  = DirectoryReader.Open(dir);
                    IBits       delDocs = MultiFields.GetLiveDocs(reader);
                    for (int j = 0; j < reader.MaxDoc; j++)
                    {
                        if (delDocs == null || !delDocs.Get(j))
                        {
                            reader.Document(j);
                            reader.GetTermVectors(j);
                        }
                    }
                    reader.Dispose();
                }

                dir.Dispose();
            }
        }
Beispiel #46
0
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory         dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);

            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);

            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.bufferedUpdatesStream.Any());

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.GetReader();

            Assert.IsFalse(writer.bufferedUpdatesStream.Any());
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp         = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start   = 0;
            fsmp.Length  = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.GetReader();

            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
             * /// // added docs are in the ram buffer
             * /// for (int x = 15; x < 20; x++) {
             * ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
             * ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
             * /// }
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * /// // delete from the ram buffer
             * /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
             * ///
             * /// Term id3 = new Term("id", Integer.toString(3));
             * ///
             * /// // delete from the 1st segment
             * /// writer.DeleteDocuments(id3);
             * ///
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// //System.out
             * /// //    .println("segdels1:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// // we cause a merge to happen
             * /// fsmp.doMerge = true;
             * /// fsmp.start = 0;
             * /// fsmp.Length = 2;
             * /// System.out.println("maybeMerge "+writer.SegmentInfos);
             * ///
             * /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
             * /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
             * ///
             * /// writer.MaybeMerge();
             * /// System.out.println("maybeMerge after "+writer.SegmentInfos);
             * /// // there should be docs in RAM
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// // assert we've merged the 1 and 2 segments
             * /// // and still have a segment leftover == 2
             * /// Assert.AreEqual(2, writer.SegmentInfos.Size());
             * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
             * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
             * ///
             * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// IndexReader r = writer.GetReader();
             * /// IndexReader r1 = r.getSequentialSubReaders()[0];
             * /// printDelDocs(r1.GetLiveDocs());
             * /// int[] docs = toDocsArray(id3, null, r);
             * /// System.out.println("id3 docs:"+Arrays.toString(docs));
             * /// // there shouldn't be any docs for id:3
             * /// Assert.IsTrue(docs == null);
             * /// r.Dispose();
             * ///
             * /// part2(writer, fsmp);
             * ///
             */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #47
0
        public virtual void TestKeepAllDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge");
                    }
                    writer = new IndexWriter(dir, conf);
                    policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    writer.ForceMerge(1);
                    writer.Dispose();
                }

                Assert.AreEqual(needsMerging ? 2 : 1, policy.numOnInit);

                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.numOnCommit);

                // Test listCommits
                ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);
                // 2 from closing writer
                Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count);

                // Make sure we can open a reader on each commit:
                foreach (IndexCommit commit in commits)
                {
                    IndexReader r = DirectoryReader.Open(commit);
                    r.Dispose();
                }

                // Simplistic check: just verify all segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetLastCommitGeneration(dir);
                while (gen > 0)
                {
                    IndexReader reader = DirectoryReader.Open(dir);
                    reader.Dispose();
                    dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    gen--;

                    if (gen > 0)
                    {
                        // Now that we've removed a commit point, which
                        // should have orphan'd at least one index file.
                        // Open & close a writer and assert that it
                        // actually removed something:
                        int preCount = dir.ListAll().Length;
                        writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy));
                        writer.Dispose();
                        int postCount = dir.ListAll().Length;
                        Assert.IsTrue(postCount < preCount);
                    }
                }

                dir.Dispose();
            }
        }
Beispiel #48
0
        public virtual void TestDeleteLeftoverFiles()
        {
            Directory dir = NewDirectory();

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).PreventDoubleWrite = false;
            }

            MergePolicy mergePolicy = NewLogMergePolicy(true, 10);

            // this test expects all of its segments to be in CFS
            mergePolicy.NoCFSRatio          = 1.0;
            mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10).SetMergePolicy(mergePolicy).SetUseCompoundFile(true));

            int i;

            for (i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            writer.Config.MergePolicy.NoCFSRatio = 0.0;
            writer.Config.SetUseCompoundFile(false);
            for (; i < 45; i++)
            {
                AddDoc(writer, i);
            }
            writer.Dispose();

            // Delete one doc so we get a .del file:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).SetUseCompoundFile(true));
            Term searchTerm = new Term("id", "7");

            writer.DeleteDocuments(searchTerm);
            writer.Dispose();

            // Now, artificially create an extra .del file & extra
            // .s0 file:
            string[] files = dir.ListAll();

            /*
             * for(int j=0;j<files.Length;j++) {
             * System.out.println(j + ": " + files[j]);
             * }
             */

            // TODO: fix this test better
            string ext = Codec.Default.Name.Equals("SimpleText", StringComparison.Ordinal) ? ".liv" : ".del";

            // Create a bogus separate del file for a
            // segment that already has a separate del file:
            CopyFile(dir, "_0_1" + ext, "_0_2" + ext);

            // Create a bogus separate del file for a
            // segment that does not yet have a separate del file:
            CopyFile(dir, "_0_1" + ext, "_1_1" + ext);

            // Create a bogus separate del file for a
            // non-existent segment:
            CopyFile(dir, "_0_1" + ext, "_188_1" + ext);

            // Create a bogus segment file:
            CopyFile(dir, "_0.cfs", "_188.cfs");

            // Create a bogus fnm file when the CFS already exists:
            CopyFile(dir, "_0.cfs", "_0.fnm");

            // Create some old segments file:
            CopyFile(dir, "segments_2", "segments");
            CopyFile(dir, "segments_2", "segments_1");

            // Create a bogus cfs file shadowing a non-cfs segment:

            // TODO: assert is bogus (relies upon codec-specific filenames)
            Assert.IsTrue(SlowFileExists(dir, "_3.fdt") || SlowFileExists(dir, "_3.fld"));
            Assert.IsTrue(!SlowFileExists(dir, "_3.cfs"));
            CopyFile(dir, "_1.cfs", "_3.cfs");

            string[] filesPre = dir.ListAll();

            // Open & close a writer: it should delete the above 4
            // files and nothing more:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND));
            writer.Dispose();

            string[] files2 = dir.ListAll();
            dir.Dispose();

            Array.Sort(files);
            Array.Sort(files2);

            HashSet <string> dif = DifFiles(files, files2);

            if (!Arrays.Equals(files, files2))
            {
                Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2) + "\ndiff: " + dif);
            }
        }
Beispiel #49
0
        public virtual void TestBasic()
        {
            Directory   dir      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb   = new StringBuilder(265);
            string        term = "term";

            for (int i = 0; i < 30; i++)
            {
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field  noTf    = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);
                doc.Add(noTf);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);
                doc.Add(tf);

                writer.AddDocument(doc);
                //System.out.println(d);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             */
            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d  = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            try
            {
                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            }
            catch (Exception e)
            {
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                {
                    cause = cause.InnerException;
                }
                if (!(cause is InvalidOperationException))
                {
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed
            }

            searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this));
            //System.out.println(CountingHitCollector.getCount());

            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

            reader.Dispose();
            dir.Dispose();
        }
Beispiel #50
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields")));

            dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER;

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.IsIndexed = false;
            ft.IsStored  = true;
            ft.Freeze();
            int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20);
            var value       = new byte[valueLength];

            for (int i = 0; i < valueLength; ++i)
            {
                // random so that even compressing codecs can't compress it
                value[i] = (byte)Random().Next(256);
            }
            Field f = new Field("fld", value, ft);

            doc.Add(f);

            int numDocs = (int)((1L << 32) / valueLength + 100);

            for (int i = 0; i < numDocs; ++i)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % (numDocs / 10) == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            if (VERBOSE)
            {
                bool found = false;
                foreach (string file in dir.ListAll())
                {
                    if (file.EndsWith(".fdt", StringComparison.Ordinal))
                    {
                        long fileLength = dir.FileLength(file);
                        if (fileLength >= 1L << 32)
                        {
                            found = true;
                        }
                        Console.WriteLine("File length of " + file + " : " + fileLength);
                    }
                }
                if (!found)
                {
                    Console.WriteLine("No .fdt file larger than 4GB, test bug?");
                }
            }

            DirectoryReader rd = DirectoryReader.Open(dir);
            Document        sd = rd.Document(numDocs - 1);

            Assert.IsNotNull(sd);
            Assert.AreEqual(1, sd.Fields.Count);
            BytesRef valueRef = sd.GetBinaryValue("fld");

            Assert.IsNotNull(valueRef);
            Assert.AreEqual(new BytesRef(value), valueRef);
            rd.Dispose();

            dir.Dispose();
        }
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field         field       = new Field("field", tokenStream, ft);

            doc.Add(field);

            const int numDocs = 1000;

            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);

            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader          mr   = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));

            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);

            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));

            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Beispiel #52
0
        public virtual void TestKeepLastNDeletionPolicy()
        {
            const int N = 5;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = NewDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10);
                    MergePolicy       mp   = conf.MergePolicy;
                    mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                    IndexWriter writer = new IndexWriter(dir, conf);
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.ForceMerge(1);
                    writer.Dispose();
                }

                Assert.IsTrue(policy.numDelete > 0);
                Assert.AreEqual(N + 1, policy.numOnInit);
                Assert.AreEqual(N + 1, policy.numOnCommit);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetLastCommitGeneration(dir);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = DirectoryReader.Open(dir);
                        reader.Dispose();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        if (i != N)
                        {
                            throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Dispose();
            }
        }
Beispiel #53
0
        public virtual void TestForceMergeDeletes2()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50)));

            Document document = new Document();

            FieldType customType = new FieldType();

            customType.IsStored = true;

            FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);

            customType1.IsTokenized              = false;
            customType1.StoreTermVectors         = true;
            customType1.StoreTermVectorPositions = true;
            customType1.StoreTermVectorOffsets   = true;

            Field storedField = NewField("stored", "stored", customType);

            document.Add(storedField);
            Field termVectorField = NewField("termVector", "termVector", customType1);

            document.Add(termVectorField);
            Field idField = NewStringField("id", "", Field.Store.NO);

            document.Add(idField);
            for (int i = 0; i < 98; i++)
            {
                idField.SetStringValue("" + i);
                writer.AddDocument(document);
            }
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            Assert.AreEqual(98, ir.MaxDoc);
            Assert.AreEqual(98, ir.NumDocs);
            ir.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);

            writer = new IndexWriter(dir, dontMergeConfig);
            for (int i = 0; i < 98; i += 2)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.NumDocs);
            ir.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy(3)));
            Assert.AreEqual(49, writer.NumDocs);
            writer.ForceMergeDeletes();
            writer.Dispose();
            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.MaxDoc);
            Assert.AreEqual(49, ir.NumDocs);
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #54
0
        public virtual void TestAddIndexOnDiskFull()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));

            int START_COUNT = 57;
            int NUM_DIR     = TestNightly ? 50 : 5;
            int END_COUNT   = START_COUNT + NUM_DIR * (TestNightly ? 25 : 5);

            // Build up a bunch of dirs that have indexes which we
            // will then merge together by calling addIndexes(*):
            Directory[] dirs           = new Directory[NUM_DIR];
            long        inputDiskUsage = 0;

            for (int i = 0; i < NUM_DIR; i++)
            {
                dirs[i] = NewDirectory();
                IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                for (int j = 0; j < 25; j++)
                {
                    AddDocWithIndex(writer, 25 * i + j);
                }
                writer.Dispose();
                string[] files = dirs[i].ListAll();
                for (int j = 0; j < files.Length; j++)
                {
                    inputDiskUsage += dirs[i].FileLength(files[j]);
                }
            }

            // Now, build a starting index that has START_COUNT docs.  We
            // will then try to addIndexes into a copy of this:
            MockDirectoryWrapper startDir  = NewMockDirectory();
            IndexWriter          indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            for (int j = 0; j < START_COUNT; j++)
            {
                AddDocWithIndex(indWriter, j);
            }
            indWriter.Dispose();

            // Make sure starting index seems to be working properly:
            Term        searchTerm = new Term("content", "aaa");
            IndexReader reader     = DirectoryReader.Open(startDir);

            Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");

            IndexSearcher searcher = NewSearcher(reader);

            ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
            Assert.AreEqual(57, hits.Length, "first number of hits");
            reader.Dispose();

            // Iterate with larger and larger amounts of free
            // disk space.  With little free disk space,
            // addIndexes will certainly run out of space &
            // fail.  Verify that when this happens, index is
            // not corrupt and index in fact has added no
            // documents.  Then, we increase disk space by 2000
            // bytes each iteration.  At some point there is
            // enough free disk space and addIndexes should
            // succeed and index should show all documents were
            // added.

            // String[] files = startDir.ListAll();
            long diskUsage = startDir.GetSizeInBytes();

            long startDiskUsage = 0;

            string[] files_ = startDir.ListAll();
            for (int i = 0; i < files_.Length; i++)
            {
                startDiskUsage += startDir.FileLength(files_[i]);
            }

            for (int iter = 0; iter < 3; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                // Start with 100 bytes more than we are currently using:
                long diskFree = diskUsage + TestUtil.NextInt32(Random, 50, 200);

                int method = iter;

                bool success = false;
                bool done    = false;

                string methodName;
                if (0 == method)
                {
                    methodName = "addIndexes(Directory[]) + forceMerge(1)";
                }
                else if (1 == method)
                {
                    methodName = "addIndexes(IndexReader[])";
                }
                else
                {
                    methodName = "addIndexes(Directory[])";
                }

                while (!done)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: cycle...");
                    }

                    // Make a new dir that will enforce disk usage:
                    MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory(startDir, NewIOContext(Random)));
                    indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false)));
                    IOException err = null;

                    IMergeScheduler ms = indWriter.Config.MergeScheduler;
                    for (int x = 0; x < 2; x++)
                    {
                        if (ms is IConcurrentMergeScheduler)
                        // this test intentionally produces exceptions
                        // in the threads that CMS launches; we don't
                        // want to pollute test output with these.
                        {
                            if (0 == x)
                            {
                                ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                            }
                            else
                            {
                                ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions();
                            }
                        }

                        // Two loops: first time, limit disk space &
                        // throw random IOExceptions; second time, no
                        // disk space limit:

                        double rate      = 0.05;
                        double diskRatio = ((double)diskFree) / diskUsage;
                        long   thisDiskFree;

                        string testName = null;

                        if (0 == x)
                        {
                            dir.RandomIOExceptionRateOnOpen = Random.NextDouble() * 0.01;
                            thisDiskFree = diskFree;
                            if (diskRatio >= 2.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 4.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 6.0)
                            {
                                rate = 0.0;
                            }
                            if (Verbose)
                            {
                                testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
                            }
                        }
                        else
                        {
                            dir.RandomIOExceptionRateOnOpen = 0.0;
                            thisDiskFree = 0;
                            rate         = 0.0;
                            if (Verbose)
                            {
                                testName = "disk full test " + methodName + " with unlimited disk space";
                            }
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("\ncycle: " + testName);
                        }

                        dir.TrackDiskUsage        = true;
                        dir.MaxSizeInBytes        = thisDiskFree;
                        dir.RandomIOExceptionRate = rate;

                        try
                        {
                            if (0 == method)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now addIndexes count=" + dirs.Length);
                                }
                                indWriter.AddIndexes(dirs);
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now forceMerge");
                                }
                                indWriter.ForceMerge(1);
                            }
                            else if (1 == method)
                            {
                                IndexReader[] readers = new IndexReader[dirs.Length];
                                for (int i = 0; i < dirs.Length; i++)
                                {
                                    readers[i] = DirectoryReader.Open(dirs[i]);
                                }
                                try
                                {
                                    indWriter.AddIndexes(readers);
                                }
                                finally
                                {
                                    for (int i = 0; i < dirs.Length; i++)
                                    {
                                        readers[i].Dispose();
                                    }
                                }
                            }
                            else
                            {
                                indWriter.AddIndexes(dirs);
                            }

                            success = true;
                            if (Verbose)
                            {
                                Console.WriteLine("  success!");
                            }

                            if (0 == x)
                            {
                                done = true;
                            }
                        }
                        catch (IOException e)
                        {
                            success = false;
                            err     = e;
                            if (Verbose)
                            {
                                Console.WriteLine("  hit IOException: " + e);
                                Console.WriteLine(e.StackTrace);
                            }

                            if (1 == x)
                            {
                                Console.WriteLine(e.StackTrace);
                                Assert.Fail(methodName + " hit IOException after disk space was freed up");
                            }
                        }

                        // Make sure all threads from
                        // ConcurrentMergeScheduler are done
                        TestUtil.SyncConcurrentMerges(indWriter);

                        if (Verbose)
                        {
                            Console.WriteLine("  now test readers");
                        }

                        // Finally, verify index is not corrupt, and, if
                        // we succeeded, we see all docs added, and if we
                        // failed, we see either all docs or no docs added
                        // (transactional semantics):
                        dir.RandomIOExceptionRateOnOpen = 0.0;
                        try
                        {
                            reader = DirectoryReader.Open(dir);
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when creating IndexReader: " + e);
                        }
                        int result = reader.DocFreq(searchTerm);
                        if (success)
                        {
                            if (result != START_COUNT)
                            {
                                Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result != START_COUNT && result != END_COUNT)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
                            }
                        }

                        searcher = NewSearcher(reader);
                        try
                        {
                            hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs;
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when searching: " + e);
                        }
                        int result2 = hits.Length;
                        if (success)
                        {
                            if (result2 != result)
                            {
                                Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result2 != result)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }

                        reader.Dispose();
                        if (Verbose)
                        {
                            Console.WriteLine("  count is " + result);
                        }

                        if (done || result == END_COUNT)
                        {
                            break;
                        }
                    }

                    if (Verbose)
                    {
                        Console.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes);
                    }

                    if (done)
                    {
                        // Javadocs state that temp free Directory space
                        // required is at most 2X total input size of
                        // indices so let's make sure:
                        Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes");
                    }

                    // Make sure we don't hit disk full during close below:
                    dir.MaxSizeInBytes              = 0;
                    dir.RandomIOExceptionRate       = 0.0;
                    dir.RandomIOExceptionRateOnOpen = 0.0;

                    indWriter.Dispose();

                    // Wait for all BG threads to finish else
                    // dir.Dispose() will throw IOException because
                    // there are still open files
                    TestUtil.SyncConcurrentMerges(ms);

                    dir.Dispose();

                    // Try again with more free space:
                    diskFree += TestNightly ? TestUtil.NextInt32(Random, 4000, 8000) : TestUtil.NextInt32(Random, 40000, 80000);
                }
            }

            startDir.Dispose();
            foreach (Directory dir in dirs)
            {
                dir.Dispose();
            }
        }
        public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            int NUM_THREADS   = 3;
            int numIterations = TEST_NIGHTLY ? 7 : 3;

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory dir    = NewDirectory();
                var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                   .SetMaxBufferedDocs(10)
                                   .SetMergeScheduler(newScheduler())
                                   .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, false, NewField)

                                 // LUCENENET NOTE - ConcurrentMergeScheduler
                                 // used to take too long for this test to index a single document
                                 // so, increased the time from 200 to 300 ms.
                                 // But it has now been restored to 200 ms like Lucene.
                    {
                        TimeToRunInMilliseconds = 200
                    };
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                bool done = false;
                while (!done)
                {
                    Thread.Sleep(100);
                    for (int i = 0; i < NUM_THREADS; i++)
                    // only stop when at least one thread has added a doc
                    {
                        if (threads[i].AddCount > 0)
                        {
                            done = true;
                            break;
                        }
                        else if (!threads[i].IsAlive)
                        {
                            Assert.Fail("thread failed before indexing a single document");
                        }
                    }
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: now close");
                }
                writer.Dispose(false);

                // Make sure threads that are adding docs are not hung:
                for (int i = 0; i < NUM_THREADS; i++)
                {
                    // Without fix for LUCENE-1130: one of the
                    // threads will hang
                    threads[i].Join();
                    if (threads[i].IsAlive)
                    {
                        Assert.Fail("thread seems to be hung");
                    }
                }

                // Quick test to make sure index is not corrupt:
                IndexReader reader = DirectoryReader.Open(dir);
                DocsEnum    tdocs  = TestUtil.Docs(Random(), reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0);
                int         count  = 0;
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    count++;
                }
                Assert.IsTrue(count > 0);
                reader.Dispose();

                dir.Dispose();
            }
        }
Beispiel #56
0
        public virtual void TestRandom()
        {
            int num = AtLeast(2);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                Directory dir = NewDirectory();

                IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                w.KeepFullyDeletedSegments = true;

                IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >();
                HashSet <int?>   deleted = new HashSet <int?>();
                IList <BytesRef> terms   = new List <BytesRef>();

                int numDocs            = TestUtil.NextInt(Random(), 1, 100 * RANDOM_MULTIPLIER);
                Documents.Document doc = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field id = NewStringField("id", "", Field.Store.NO);
                doc.Add(id);

                bool onlyUniqueTerms = Random().NextBoolean();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
                }
                HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();
                for (int i = 0; i < numDocs; i++)
                {
                    if (!onlyUniqueTerms && Random().NextBoolean() && terms.Count > 0)
                    {
                        // re-use existing term
                        BytesRef term = terms[Random().Next(terms.Count)];
                        docs[term].Add(i);
                        f.SetStringValue(term.Utf8ToString());
                    }
                    else
                    {
                        string   s    = TestUtil.RandomUnicodeString(Random(), 10);
                        BytesRef term = new BytesRef(s);
                        if (!docs.ContainsKey(term))
                        {
                            docs[term] = new List <int?>();
                        }
                        docs[term].Add(i);
                        terms.Add(term);
                        uniqueTerms.Add(term);
                        f.SetStringValue(s);
                    }
                    id.SetStringValue("" + i);
                    w.AddDocument(doc);
                    if (Random().Next(4) == 1)
                    {
                        w.Commit();
                    }
                    if (i > 0 && Random().Next(20) == 1)
                    {
                        int delID = Random().Next(i);
                        deleted.Add(delID);
                        w.DeleteDocuments(new Term("id", "" + delID));
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: delete " + delID);
                        }
                    }
                }

                if (VERBOSE)
                {
                    List <BytesRef> termsList = new List <BytesRef>(uniqueTerms);
#pragma warning disable 612, 618
                    termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                    Console.WriteLine("TEST: terms in UTF16 order:");
                    foreach (BytesRef b in termsList)
                    {
                        Console.WriteLine("  " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b);
                        foreach (int docID in docs[b])
                        {
                            if (deleted.Contains(docID))
                            {
                                Console.WriteLine("    " + docID + " (deleted)");
                            }
                            else
                            {
                                Console.WriteLine("    " + docID);
                            }
                        }
                    }
                }

                IndexReader reader = w.GetReader();
                w.Dispose();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: reader=" + reader);
                }

                IBits liveDocs = MultiFields.GetLiveDocs(reader);
                foreach (int delDoc in deleted)
                {
                    Assert.IsFalse(liveDocs.Get(delDoc));
                }

                for (int i = 0; i < 100; i++)
                {
                    BytesRef term = terms[Random().Next(terms.Count)];
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term);
                    }

                    DocsEnum docsEnum = TestUtil.Docs(Random(), reader, "field", term, liveDocs, null, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);

                    foreach (int docID in docs[term])
                    {
                        if (!deleted.Contains(docID))
                        {
                            Assert.AreEqual(docID, docsEnum.NextDoc());
                        }
                    }
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }

                reader.Dispose();
                dir.Dispose();
            }
        }
Beispiel #57
0
        public virtual void Test2BTerms_Mem()
        {
            if ("Lucene3x".Equals(Codec.Default.Name))
            {
                throw new Exception("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            dir.CheckIndexOnClose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random(), TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = Environment.TickCount;
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                }
                savedTerms = ts.SavedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Beispiel #58
0
 /// helper funciton for CreateIndex()
 private void CleanUpIndex()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
Beispiel #59
0
 /// <summary>
 /// Flushes the buffer and closes the index
 /// </summary>
 public void CleanUpIndexer()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
Beispiel #60
0
        public virtual void TestKeepLastNDeletionPolicyWithCreates()
        {
            const int N = 10;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory         dir  = NewDirectory();
                IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepLastNDeletionPolicy(this, N)).SetMaxBufferedDocs(10);
                MergePolicy       mp   = conf.MergePolicy;
                mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                IndexWriter             writer = new IndexWriter(dir, conf);
                KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                writer.Dispose();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    conf          = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10);
                    mp            = conf.MergePolicy;
                    mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                    writer        = new IndexWriter(dir, conf);
                    policy        = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    for (int j = 0; j < 17; j++)
                    {
                        AddDocWithID(writer, i * (N + 1) + j);
                    }
                    // this is a commit
                    writer.Dispose();
                    conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetIndexDeletionPolicy(policy).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
                    writer = new IndexWriter(dir, conf);
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    writer.DeleteDocuments(new Term("id", "" + (i * (N + 1) + 3)));
                    // this is a commit
                    writer.Dispose();
                    IndexReader   reader   = DirectoryReader.Open(dir);
                    IndexSearcher searcher = NewSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).ScoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    reader.Dispose();

                    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy));
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    // this will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Dispose();
                }

                Assert.AreEqual(3 * (N + 1) + 1, policy.numOnInit);
                Assert.AreEqual(3 * (N + 1) + 1, policy.numOnCommit);

                IndexReader   rwReader  = DirectoryReader.Open(dir);
                IndexSearcher searcher_ = NewSearcher(rwReader);
                ScoreDoc[]    hits_     = searcher_.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits_.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetLastCommitGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                rwReader.Dispose();

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = DirectoryReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.
                        searcher_ = NewSearcher(reader);
                        hits_     = searcher_.Search(query, null, 1000).ScoreDocs;
                        Assert.AreEqual(expectedCount, hits_.Length);
                        if (expectedCount == 0)
                        {
                            expectedCount = 16;
                        }
                        else if (expectedCount == 16)
                        {
                            expectedCount = 17;
                        }
                        else if (expectedCount == 17)
                        {
                            expectedCount = 0;
                        }
                        reader.Dispose();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        if (i != N)
                        {
                            throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Dispose();
            }
        }