Ejemplo n.º 1
0
        public static void BeforeClassCountingFacetsAggregatorTest()
        {
            indexDir = NewDirectory();
            taxoDir = NewDirectory();

            // create an index which has:
            // 1. Segment with no categories, but matching results
            // 2. Segment w/ categories, but no results
            // 3. Segment w/ categories and results
            // 4. Segment w/ categories, but only some results

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //conf.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges, so we can control the index segments
            IndexWriter indexWriter = new IndexWriter(indexDir, conf);
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            allExpectedCounts = newCounts();
            termExpectedCounts = newCounts();

            // segment w/ no categories
            IndexDocsNoFacets(indexWriter);

            // segment w/ categories, no content
            IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and content
            IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);

            // segment w/ categories and some content
            IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);

            IOUtils.Close(indexWriter, taxoWriter);
        }
Ejemplo n.º 2
0
        private void CreateIndex(EditViewArticle article)
        {
            //using (var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48))
            //{
            var options = new Lucene.Net.Index.IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, null)
            {
                OpenMode = Lucene.Net.Index.OpenMode.CREATE
            };

            //using (var indexWriter = new Lucene.Net.Index.IndexWriter(_directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED))
            using (var indexWriter = new Lucene.Net.Index.IndexWriter(_directory, options))
            {
                var document = new Lucene.Net.Documents.Document
                {
                    new Lucene.Net.Documents.TextField("Id", article.Id.ToString(), Lucene.Net.Documents.Field.Store.YES),
                    new Lucene.Net.Documents.TextField("Title", article.Title, Lucene.Net.Documents.Field.Store.YES),
                    // HTML文本
                    // old版本//document.Add(new Lucene.Net.Documents.Field("Contents", article.Contents, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
                    //document.Add(new Lucene.Net.Documents.TextField("Contents", article.Contents, Lucene.Net.Documents.Field.Store.YES));
                    // 纯文本
                    new Lucene.Net.Documents.TextField("TContents", article.Summary, Lucene.Net.Documents.Field.Store.YES),
                    new Lucene.Net.Documents.TextField("CreateTime", article.CreateTime.ToString(), Lucene.Net.Documents.Field.Store.YES)
                };

                indexWriter.AddDocument(document, this.CreateAnalyzer());
                indexWriter.Commit();
            }
            //}
        }
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewFSDirectory(CreateTempDir("testDFBlockSize"));
     Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     Iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
     Iw = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)Iwc.Clone());
     Iw.RandomForceMerge = false; // we will ourselves
 }
 public void TestCustomMergeScheduler()
 {
     // we don't really need to execute anything, just to make sure the custom MS
     // compiles. But ensure that it can be used as well, e.g., no other hidden
     // dependencies or something. Therefore, don't use any random API !
     Directory dir = new RAMDirectory();
     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.SetMergeScheduler(new ReportingMergeScheduler());
     IndexWriter writer = new IndexWriter(dir, conf);
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.ForceMerge(1);
     writer.Dispose();
     dir.Dispose();
 }
Ejemplo n.º 5
0
        } // End Function GetWrappedAnalyzer

        private static void BuildIndex(string indexPath, System.Collections.Generic.IEnumerable <string> dataToIndex)
        {
            Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48;

            Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);


            // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(version);
            // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version);
            // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.KeywordAnalyzer();
            Lucene.Net.Analysis.Analyzer analyzer = GetWrappedAnalyzer();

            Lucene.Net.Index.IndexWriterConfig writerConfig = new Lucene.Net.Index.IndexWriterConfig(version, analyzer);
            writerConfig.OpenMode = Lucene.Net.Index.OpenMode.CREATE; // Overwrite, if exists

            using (Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, writerConfig))
            {
                foreach (string thisValue in dataToIndex)
                {
                    Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

                    string directory_name        = System.IO.Path.GetDirectoryName(thisValue);
                    string file_name             = System.IO.Path.GetFileName(thisValue);
                    string filename_no_extension = System.IO.Path.GetFileNameWithoutExtension(thisValue);
                    string extension             = System.IO.Path.GetExtension(thisValue);


                    // StringField indexes but doesn't tokenize
                    doc.Add(new Lucene.Net.Documents.StringField("full_name", thisValue, Lucene.Net.Documents.Field.Store.YES));
                    doc.Add(new Lucene.Net.Documents.StringField("directory_name", directory_name, Lucene.Net.Documents.Field.Store.YES));
                    doc.Add(new Lucene.Net.Documents.StringField("file_name", file_name, Lucene.Net.Documents.Field.Store.YES));
                    doc.Add(new Lucene.Net.Documents.StringField("filename_no_extension", filename_no_extension, Lucene.Net.Documents.Field.Store.YES));
                    doc.Add(new Lucene.Net.Documents.StringField("extension", extension, Lucene.Net.Documents.Field.Store.YES));
                    // doc.Add( new Lucene.Net.Documents.TextField("favoritePhrase", thisValue, Lucene.Net.Documents.Field.Store.YES) );


                    writer.AddDocument(doc);
                } // Next thisValue

                // writer.Optimize();
                writer.Flush(true, true);
            } // Dispose needs to be called, otherwise the index cannot be read ...
        }     // End Sub BuildIndex
Ejemplo n.º 6
0
            public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes)
            {
                this.OuterInstance = outerInstance;
                MyNodeID = nodeID;
                Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase"));
                // TODO: set warmer
                MockAnalyzer analyzer = new MockAnalyzer(Random());
                analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
                iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
                if (VERBOSE)
                {
                    iwc.InfoStream = new PrintStreamInfoStream(Console.Out);
                }
                Writer = new IndexWriter(Dir, iwc);
                Mgr = new SearcherManager(Writer, true, null);
                Searchers = new SearcherLifetimeManager();

                // Init w/ 0s... caller above will do initial
                // "broadcast" by calling initSearcher:
                CurrentNodeVersions = new long[numNodes];
            }
 protected override IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)
 {
     return iw;
 }
 protected override IndexWriterConfig CreateIndexWriterConfig(IndexWriterConfig.OpenMode_e openMode)
 {
     IndexWriterConfig conf = base.CreateIndexWriterConfig(openMode);
     LogMergePolicy lmp = (LogMergePolicy)conf.MergePolicy;
     lmp.MergeFactor = 2;
     return conf;
 }
Ejemplo n.º 9
0
        public virtual void TestBooleanSpanQuery()
        {
            bool failed = false;
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            BooleanQuery query = new BooleanQuery();
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1, BooleanClause.Occur.SHOULD);
            query.Add(sq2, BooleanClause.Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries");
            Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries");
            directory.Dispose();
        }
 public LatchedIndexWriter(Directory d, IndexWriterConfig conf, CountDownLatch latch, CountDownLatch signal)
     : base(d, conf)
 {
     this.Latch = latch;
     this.Signal = signal;
 }
        // LUCENE-5461
        public virtual void TestCRTReopen()
        {
            //test behaving badly

            //should be high enough
            int maxStaleSecs = 20;

            //build crap data just to store it.
            string s = "        abcdefghijklmnopqrstuvwxyz     ";
            char[] chars = s.ToCharArray();
            StringBuilder builder = new StringBuilder(2048);
            for (int i = 0; i < 2048; i++)
            {
                builder.Append(chars[Random().Next(chars.Length)]);
            }
            string content = builder.ToString();

            SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
            Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new MockAnalyzer(Random()));
            config.SetIndexDeletionPolicy(sdp);
            config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND);
            IndexWriter iw = new IndexWriter(dir, config);
            SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory());
            TrackingIndexWriter tiw = new TrackingIndexWriter(iw);
            ControlledRealTimeReopenThread<IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(tiw, sm, maxStaleSecs, 0);

            controlledRealTimeReopenThread.SetDaemon(true);
            controlledRealTimeReopenThread.Start();

            IList<Thread> commitThreads = new List<Thread>();

            for (int i = 0; i < 500; i++)
            {
                if (i > 0 && i % 50 == 0)
                {
                    Thread commitThread = new Thread(new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw));
                    commitThread.Start();
                    commitThreads.Add(commitThread);
                }
                Document d = new Document();
                d.Add(new TextField("count", i + "", Field.Store.NO));
                d.Add(new TextField("content", content, Field.Store.YES));
                long start = DateTime.Now.Millisecond;
                long l = tiw.AddDocument(d);
                controlledRealTimeReopenThread.WaitForGeneration(l);
                long wait = DateTime.Now.Millisecond - start;
                Assert.IsTrue(wait < (maxStaleSecs * 1000), "waited too long for generation " + wait);
                IndexSearcher searcher = sm.Acquire();
                TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10);
                sm.Release(searcher);
                Assert.AreEqual(1, td.TotalHits);
            }

            foreach (Thread commitThread in commitThreads)
            {
                commitThread.Join();
            }

            controlledRealTimeReopenThread.Dispose();
            sm.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 12
0
 private static void Populate(Directory directory, IndexWriterConfig config)
 {
     RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, config);
     for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++)
     {
         Document document = new Document();
         for (int f = 0; f < NUMBER_OF_FIELDS; f++)
         {
             document.Add(NewStringField("field" + f, Text, Field.Store.NO));
         }
         writer.AddDocument(document);
     }
     writer.ForceMerge(1);
     writer.Dispose();
 }
        private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
        {
            LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
            logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain
            // files
            conf.SetMergePolicy(logByteSizeMergePolicy);

            IndexWriter writer = new IndexWriter(dir, conf);
            return writer;
        }
Ejemplo n.º 14
0
        public override void Dispose()
        {
            lock (this)
            {
                // files that we tried to delete, but couldn't because readers were open.
                // all that matters is that we tried! (they will eventually go away)
                ISet<string> pendingDeletions = new HashSet<string>(OpenFilesDeleted);
                MaybeYield();
                if (OpenFiles == null)
                {
                    OpenFiles = new Dictionary<string, int>();
                    OpenFilesDeleted = new HashSet<string>();
                }
                if (OpenFiles.Count > 0)
                {
                    // print the first one as its very verbose otherwise
                    Exception cause = null;
                    IEnumerator<Exception> stacktraces = OpenFileHandles.Values.GetEnumerator();
                    if (stacktraces.MoveNext())
                    {
                        cause = stacktraces.Current;
                    }

                    // RuntimeException instead ofSystem.IO.IOException because
                    // super() does not throwSystem.IO.IOException currently:
                    throw new Exception("MockDirectoryWrapper: cannot close: there are still open files: "
                        + String.Join(" ,", OpenFiles.ToArray().Select(x => x.Key)), cause);
                }
                if (OpenLocks.Count > 0)
                {
                    throw new Exception("MockDirectoryWrapper: cannot close: there are still open locks: "
                        + String.Join(" ,", OpenLocks.ToArray()));
                }

                IsOpen = false;
                if (CheckIndexOnClose)
                {
                    RandomIOExceptionRate_Renamed = 0.0;
                    RandomIOExceptionRateOnOpen_Renamed = 0.0;
                    if (DirectoryReader.IndexExists(this))
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("\nNOTE: MockDirectoryWrapper: now crush");
                        }
                        Crash(); // corrupt any unsynced-files
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
                        }
                        TestUtil.CheckIndex(this, CrossCheckTermVectorsOnClose);

                        // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
                        if (AssertNoUnreferencedFilesOnClose)
                        {
                            // now look for unreferenced files: discount ones that we tried to delete but could not
                            HashSet<string> allFiles = new HashSet<string>(Arrays.AsList(ListAll()));
                            allFiles.RemoveAll(pendingDeletions);
                            string[] startFiles = allFiles.ToArray(/*new string[0]*/);
                            IndexWriterConfig iwc = new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null);
                            iwc.SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
                            (new IndexWriter(@in, iwc)).Rollback();
                            string[] endFiles = @in.ListAll();

                            ISet<string> startSet = new SortedSet<string>(Arrays.AsList(startFiles));
                            ISet<string> endSet = new SortedSet<string>(Arrays.AsList(endFiles));

                            if (pendingDeletions.Contains("segments.gen") && endSet.Contains("segments.gen"))
                            {
                                // this is possible if we hit an exception while writing segments.gen, we try to delete it
                                // and it ends out in pendingDeletions (but IFD wont remove this).
                                startSet.Add("segments.gen");
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("MDW: Unreferenced check: Ignoring segments.gen that we could not delete.");
                                }
                            }

                            // its possible we cannot delete the segments_N on windows if someone has it open and
                            // maybe other files too, depending on timing. normally someone on windows wouldnt have
                            // an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy...
                            foreach (string file in pendingDeletions)
                            {
                                if (file.StartsWith("segments") && !file.Equals("segments.gen") && endSet.Contains(file))
                                {
                                    startSet.Add(file);
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("MDW: Unreferenced check: Ignoring segments file: " + file + " that we could not delete.");
                                    }
                                    SegmentInfos sis = new SegmentInfos();
                                    try
                                    {
                                        sis.Read(@in, file);
                                    }
                                    catch (System.IO.IOException ioe)
                                    {
                                        // OK: likely some of the .si files were deleted
                                    }

                                    try
                                    {
                                        ISet<string> ghosts = new HashSet<string>(sis.Files(@in, false));
                                        foreach (string s in ghosts)
                                        {
                                            if (endSet.Contains(s) && !startSet.Contains(s))
                                            {
                                                Debug.Assert(pendingDeletions.Contains(s));
                                                if (LuceneTestCase.VERBOSE)
                                                {
                                                    Console.WriteLine("MDW: Unreferenced check: Ignoring referenced file: " + s + " " + "from " + file + " that we could not delete.");
                                                }
                                                startSet.Add(s);
                                            }
                                        }
                                    }
                                    catch (Exception t)
                                    {
                                        Console.Error.WriteLine("ERROR processing leftover segments file " + file + ":");
                                        Console.WriteLine(t.ToString());
                                        Console.Write(t.StackTrace);
                                    }
                                }
                            }

                            startFiles = startSet.ToArray(/*new string[0]*/);
                            endFiles = endSet.ToArray(/*new string[0]*/);

                            if (!Arrays.Equals(startFiles, endFiles))
                            {
                                IList<string> removed = new List<string>();
                                foreach (string fileName in startFiles)
                                {
                                    if (!endSet.Contains(fileName))
                                    {
                                        removed.Add(fileName);
                                    }
                                }

                                IList<string> added = new List<string>();
                                foreach (string fileName in endFiles)
                                {
                                    if (!startSet.Contains(fileName))
                                    {
                                        added.Add(fileName);
                                    }
                                }

                                string extras;
                                if (removed.Count != 0)
                                {
                                    extras = "\n\nThese files were removed: " + removed;
                                }
                                else
                                {
                                    extras = "";
                                }

                                if (added.Count != 0)
                                {
                                    extras += "\n\nThese files were added (waaaaaaaaaat!): " + added;
                                }

                                if (pendingDeletions.Count != 0)
                                {
                                    extras += "\n\nThese files we had previously tried to delete, but couldn't: " + pendingDeletions;
                                }

                                Debug.Assert(false, "unreferenced files: before delete:\n    " + Arrays.ToString(startFiles) + "\n  after delete:\n    " + Arrays.ToString(endFiles) + extras);
                            }

                            DirectoryReader ir1 = DirectoryReader.Open(this);
                            int numDocs1 = ir1.NumDocs;
                            ir1.Dispose();
                            (new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null))).Dispose();
                            DirectoryReader ir2 = DirectoryReader.Open(this);
                            int numDocs2 = ir2.NumDocs;
                            ir2.Dispose();
                            Debug.Assert(numDocs1 == numDocs2, "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2);
                        }
                    }
                }
                @in.Dispose();
            }
        }
Ejemplo n.º 15
0
        // NOTE: not a test; just here to make sure the code frag
        // in the javadocs is correct!
        public virtual void VerifyCompiles()
        {
            Analyzer analyzer = null;

            Directory fsDir = FSDirectory.Open(new DirectoryInfo("/path/to/index"));
            NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            IndexWriter writer = new IndexWriter(cachedFSDir, conf);
        }