Ejemplos de código de MockAnalyzer en C# (CSharp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: TestWindowsMMap.cs Proyecto: joyanta/lucene.net

        public virtual void TestMmapIndex()
        {
            // sometimes the directory is not cleaned by rmDir, because on Windows it
            // may take some time until the files are finally dereferenced. So clean the
            // directory up front, or otherwise new IndexWriter will fail.
            DirectoryInfo dirPath = CreateTempDir("testLuceneMmap");
            RmDir(dirPath);
            MMapDirectory dir = new MMapDirectory(dirPath, null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));
            writer.Commit();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            int num = AtLeast(1000);
            for (int dx = 0; dx < num; dx++)
            {
                string f = RandomField();
                Document doc = new Document();
                doc.Add(NewTextField("data", f, Field.Store.YES));
                writer.AddDocument(doc);
            }

            reader.Dispose();
            writer.Dispose();
            RmDir(dirPath);
        }

Ejemplo n.º 2

0

Mostrar archivo

Archivo: TestFlushByRamOrCountsPolicy.cs Proyecto: ChristopherHaws/lucenenet

        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
            Directory dir = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);
            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount());
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms());
            Assert.IsTrue(flushPolicy.FlushOnRAM());
            DocumentsWriter docsWriter = writer.DocsWriter;
            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled());
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            dir.Dispose();
        }

Ejemplo n.º 3

0

Mostrar archivo

Archivo: TestDuelingCodecs.cs Proyecto: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();

            // for now its SimpleText vs Lucene46(random postings format)
            // as this gives the best overall coverage. when we have more
            // codecs we should probably pick 2 from Codec.availableCodecs()

            LeftCodec = Codec.ForName("SimpleText");
            RightCodec = new RandomCodec(Random());

            LeftDir = NewDirectory();
            RightDir = NewDirectory();

            long seed = Random().Next();

            // must use same seed because of random payloads, etc
            int maxTermLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random((int)seed));
            leftAnalyzer.MaxTokenLength = maxTermLength;
            MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed));
            rightAnalyzer.MaxTokenLength = maxTermLength;

            // but these can be different
            // TODO: this turns this into a really big test of Multi*, is that what we want?
            IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
            leftConfig.SetCodec(LeftCodec);
            // preserve docids
            leftConfig.SetMergePolicy(NewLogMergePolicy());

            IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer);
            rightConfig.SetCodec(RightCodec);
            // preserve docids
            rightConfig.SetMergePolicy(NewLogMergePolicy());

            // must use same seed because of random docvalues fields, etc
            RandomIndexWriter leftWriter = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig);
            RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig);

            int numdocs = AtLeast(100);
            CreateRandomIndex(numdocs, leftWriter, seed);
            CreateRandomIndex(numdocs, rightWriter, seed);

            LeftReader = MaybeWrapReader(leftWriter.Reader);
            leftWriter.Dispose();
            RightReader = MaybeWrapReader(rightWriter.Reader);
            rightWriter.Dispose();

            // check that our readers are valid
            TestUtil.CheckReader(LeftReader);
            TestUtil.CheckReader(RightReader);

            Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString();
        }

Ejemplo n.º 4

0

Mostrar archivo

Archivo: TestDuelingAnalyzers.cs Proyecto: ChristopherHaws/lucenenet

 public virtual void TestLetterAscii()
 {
     Random random = Random();
     Analyzer left = new MockAnalyzer(random, jvmLetter, false);
     Analyzer right = new AnalyzerAnonymousInnerClassHelper(this);
     for (int i = 0; i < 1000; i++)
     {
         string s = TestUtil.RandomSimpleString(random);
         assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
     }
 }

Ejemplo n.º 5

0

Mostrar archivo

Archivo: TestLongPostings.cs Proyecto: ChristopherHaws/lucenenet

        // Produces a realistic unicode random string that
        // survives MockAnalyzer unchanged:
        private string GetRandomTerm(string other)
        {
            Analyzer a = new MockAnalyzer(Random());
            while (true)
            {
                string s = TestUtil.RandomRealisticUnicodeString(Random());
                if (other != null && s.Equals(other))
                {
                    continue;
                }
                IOException priorException = null;
                TokenStream ts = a.TokenStream("foo", new StringReader(s));
                try
                {
                    ITermToBytesRefAttribute termAtt = ts.GetAttribute<ITermToBytesRefAttribute>();
                    BytesRef termBytes = termAtt.BytesRef;
                    ts.Reset();

                    int count = 0;
                    bool changed = false;

                    while (ts.IncrementToken())
                    {
                        termAtt.FillBytesRef();
                        if (count == 0 && !termBytes.Utf8ToString().Equals(s))
                        {
                            // The value was changed during analysis.  Keep iterating so the
                            // tokenStream is exhausted.
                            changed = true;
                        }
                        count++;
                    }

                    ts.End();
                    // Did we iterate just once and the value was unchanged?
                    if (!changed && count == 1)
                    {
                        return s;
                    }
                }
                catch (IOException e)
                {
                    priorException = e;
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(priorException, ts);
                }
            }
        }

Ejemplo n.º 6

0

Mostrar archivo

Archivo: TestPerFieldDocValuesFormat.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestTwoFieldsTwoFormats()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            DocValuesFormat fast = DocValuesFormat.ForName("Lucene45");
            DocValuesFormat slow = DocValuesFormat.ForName("Lucene45");
            iwc.SetCodec(new Lucene46CodecAnonymousInnerClassHelper(this, fast, slow));
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string text = "this is the text to be indexed. " + longTerm;
            doc.Add(NewTextField("fieldname", text, Field.Store.YES));
            doc.Add(new NumericDocValuesField("dv1", 5));
            doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
            iwriter.AddDocument(doc);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            IndexSearcher isearcher = NewSearcher(ireader);

            Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
            Query query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits = isearcher.Search(query, null, 1);
            Assert.AreEqual(1, hits.TotalHits);
            BytesRef scratch = new BytesRef();
            // Iterate through the results:
            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
                Assert.AreEqual(text, hitDoc.Get("fieldname"));
                Debug.Assert(ireader.Leaves.Count == 1);
                NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv1");
                Assert.AreEqual(5, dv.Get(hits.ScoreDocs[i].Doc));
                BinaryDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv2");
                dv2.Get(hits.ScoreDocs[i].Doc, scratch);
                Assert.AreEqual(new BytesRef("hello world"), scratch);
            }

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 7

0

Mostrar archivo

Archivo: TestCustomNorms.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestFloatNorms()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            LineFileDocs docs = new LineFileDocs(Random());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                float nextFloat = (float)Random().NextDouble();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(FloatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(FloatTestField);
            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float expected = Convert.ToSingle(document.Get(FloatTestField));
                Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }

Ejemplo n.º 8

0

Mostrar archivo

Archivo: TestLimitTokenCountAnalyzer.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestLimitTokenCountAnalyzer()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                MockAnalyzer mock = new MockAnalyzer(Random());

                // if we are consuming all tokens, we can use the checks, 
                // otherwise we can't
                mock.EnableChecks = consumeAll;
                Analyzer a = new LimitTokenCountAnalyzer(mock, 2, consumeAll);

                // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2     3  4  5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)16 : null);
                AssertTokenStreamContents(a.TokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? (int?)9 : null);

                // less than the limit, ensure we behave correctly
                AssertTokenStreamContents(a.TokenStream("dummy", "1  "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, (consumeAll ? (int?)3 : null));

                // equal to limit
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2  "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)6 : null);
            }
        }

Ejemplo n.º 9

0

Mostrar archivo

Archivo: TestLimitTokenCountAnalyzer.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestLimitTokenCountIndexWriter()
        {

            foreach (bool consumeAll in new bool[] { true, false })
            {
                Store.Directory dir = NewDirectory();
                int limit = TestUtil.NextInt(Random(), 50, 101000);
                MockAnalyzer mock = new MockAnalyzer(Random());

                // if we are consuming all tokens, we can use the checks, 
                // otherwise we can't
                mock.EnableChecks = consumeAll;
                Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll);

                IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a));

                Document doc = new Document();
                StringBuilder b = new StringBuilder();
                for (int i = 1; i < limit; i++)
                {
                    b.Append(" a");
                }
                b.Append(" x");
                b.Append(" z");
                doc.Add(NewTextField("field", b.ToString(), Field.Store.NO));
                writer.AddDocument(doc);
                writer.Dispose();

                IndexReader reader = DirectoryReader.Open(dir);
                Term t = new Term("field", "x");
                assertEquals(1, reader.DocFreq(t));
                t = new Term("field", "z");
                assertEquals(0, reader.DocFreq(t));
                reader.Dispose();
                dir.Dispose();
            }
        }

Ejemplo n.º 10

0

Mostrar archivo

Archivo: TestLimitTokenCountAnalyzer.cs Proyecto: zfxsss/lucenenet

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testLimitTokenCountIndexWriter() throws java.io.IOException
        public virtual void testLimitTokenCountIndexWriter()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                Directory    dir   = newDirectory();
                int          limit = TestUtil.Next(random(), 50, 101000);
                MockAnalyzer mock  = new MockAnalyzer(random());

                // if we are consuming all tokens, we can use the checks,
                // otherwise we can't
                mock.EnableChecks = consumeAll;
                Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll);

                IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a));

                Document      doc = new Document();
                StringBuilder b   = new StringBuilder();
                for (int i = 1; i < limit; i++)
                {
                    b.Append(" a");
                }
                b.Append(" x");
                b.Append(" z");
                doc.add(newTextField("field", b.ToString(), Field.Store.NO));
                writer.addDocument(doc);
                writer.close();

                IndexReader reader = DirectoryReader.open(dir);
                Term        t      = new Term("field", "x");
                assertEquals(1, reader.docFreq(t));
                t = new Term("field", "z");
                assertEquals(0, reader.docFreq(t));
                reader.close();
                dir.close();
            }
        }

Ejemplo n.º 11

0

Mostrar archivo

        public virtual void TestStopWordSearching()
        {
            Analyzer analyzer = new MockAnalyzer(Random);

            using var ramDir = NewDirectory();
            using (IndexWriter iw = new IndexWriter(ramDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)))
            {
                Document doc = new Document();
                doc.Add(NewTextField("body", "blah the footest blah", Field.Store.NO));
                iw.AddDocument(doc);
            }

            MultiFieldQueryParser mfqp =
                new MultiFieldQueryParser(TEST_VERSION_CURRENT, new string[] { "body" }, analyzer);

            mfqp.DefaultOperator = Operator.AND;
            Query q = mfqp.Parse("the footest");

            using IndexReader ir = DirectoryReader.Open(ramDir);
            IndexSearcher @is = NewSearcher(ir);

            ScoreDoc[] hits = @is.Search(q, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
        }

Ejemplo n.º 12

0

Mostrar archivo

        public virtual void TestBooleanSpanQuery()
        {
            int       hits            = 0;
            Directory directory       = NewDirectory();
            Analyzer  indexerAnalyzer = new MockAnalyzer(Random);

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter       writer = new IndexWriter(directory, config);
            string            FIELD  = "content";
            Document          d      = new Document();

            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader   indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher    = NewSearcher(indexReader);

            DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
            SpanQuery           sq1   = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery           sq2   = new SpanTermQuery(new Term(FIELD, "clckwork"));

            query.Add(sq1);
            query.Add(sq2);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

            searcher.Search(query, collector);
            hits = collector.GetTopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.GetTopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(hits, 1);
            directory.Dispose();
        }

Ejemplo n.º 13

0

Mostrar archivo

Archivo: ShardSearchingTestBase.cs Proyecto: zhangbo27/lucenenet

            public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes)
            {
                this.OuterInstance = outerInstance;
                MyNodeID           = nodeID;
                Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase"));
                // TODO: set warmer
                MockAnalyzer analyzer = new MockAnalyzer(Random());

                analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

                iwc.SetOpenMode(OpenMode.CREATE);
                if (VERBOSE)
                {
                    iwc.SetInfoStream(new TextWriterInfoStream(Console.Out));
                }
                Writer    = new IndexWriter(Dir, iwc);
                Mgr       = new SearcherManager(Writer, true, null);
                Searchers = new SearcherLifetimeManager();

                // Init w/ 0s... caller above will do initial
                // "broadcast" by calling initSearcher:
                CurrentNodeVersions = new long[numNodes];
            }

Ejemplo n.º 14

0

Mostrar archivo

Archivo: TestSpanFirstQuery.cs Proyecto: wwb/lucenenet

        public virtual void TestStartPositions()
        {
            Directory dir = NewDirectory();

            // mimic StopAnalyzer
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton());
            Analyzer analyzer             = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
            Document          doc    = new Document();

            doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO));
            writer.AddDocument(doc);
            Document doc2 = new Document();

            doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO));
            writer.AddDocument(doc2);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // user queries on "starts-with quick"
            SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);

            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            // user queries on "starts-with the quick"
            SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);

            sfq = new SpanNotQuery(include, sfq);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 15

0

Mostrar archivo

        public void TestBoost()
        {
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on"));
            Analyzer oneStopAnalyzer      = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            PrecedenceQueryParser qp = new PrecedenceQueryParser();

            qp.Analyzer = (oneStopAnalyzer);
            Query q = qp.Parse("on^1.0", "field");

            assertNotNull(q);
            q = qp.Parse("\"hello\"^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("hello^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("\"on\"^1.0", "field");
            assertNotNull(q);

            q = GetParser(new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).Parse("the^3",
                                                                                                                         "field");
            assertNotNull(q);
        }

Ejemplo n.º 16

0

Mostrar archivo

Archivo: TestNorms.cs Proyecto: joyanta/lucene.net

        // TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs

        public virtual void BuildIndex(Directory dir)
        {
            Random random = Random();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                int boost = Random().Next(255);
                Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES);
                f.Boost = boost;
                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(ByteTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            docs.Dispose();
        }

Ejemplo n.º 17

0

Mostrar archivo

Archivo: TestFuzzyQuery.cs Proyecto: eladmarg/lucene.net

        public virtual void TestGiga()
        {
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            Directory index = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), index);

            AddDoc("Lucene in Action", w);
            AddDoc("Lucene for Dummies", w);

            //addDoc("Giga", w);
            AddDoc("Giga byte", w);

            AddDoc("ManagingGigabytesManagingGigabyte", w);
            AddDoc("ManagingGigabytesManagingGigabytes", w);

            AddDoc("The Art of Computer Science", w);
            AddDoc("J. K. Rowling", w);
            AddDoc("JK Rowling", w);
            AddDoc("Joanne K Roling", w);
            AddDoc("Bruce Willis", w);
            AddDoc("Willis bruce", w);
            AddDoc("Brute willis", w);
            AddDoc("B. willis", w);
            IndexReader r = w.Reader;
            w.Dispose();

            Query q = new FuzzyQuery(new Term("field", "giga"), 0);

            // 3. search
            IndexSearcher searcher = NewSearcher(r);
            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
            r.Dispose();
            index.Dispose();
        }

Ejemplo n.º 18

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestVeryLargeButLegalBytes()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            conf.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, conf);
            Document doc = new Document();
            var bytes = new byte[32766];
            BytesRef b = new BytesRef(bytes);
            Random().NextBytes(bytes);
            doc.Add(new BinaryDocValuesField("dv", b));
            iwriter.AddDocument(doc);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            Debug.Assert(ireader.Leaves.Count == 1);
            BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
            BytesRef scratch = new BytesRef();
            dv.Get(0, scratch);
            Assert.AreEqual(new BytesRef(bytes), scratch);

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 19

0

Mostrar archivo

        public void TestRandomNRT()
        {
            DirectoryInfo tempDir        = CreateTempDir("AnalyzingInfixSuggesterTest");
            Analyzer      a              = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false);
            int           minPrefixChars = Random().nextInt(7);

            if (VERBOSE)
            {
                Console.WriteLine("  minPrefixChars=" + minPrefixChars);
            }

            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixChars);

            // Initial suggester built with nothing:
            suggester.Build(new InputArrayIterator(new Input[0]));

            var stop = new AtomicBoolean(false);

            Exception[] error = new Exception[] { null };

            LookupThread lookupThread = new LookupThread(this, suggester, stop, error);

            lookupThread.Start();

            int iters       = AtLeast(1000);
            int visibleUpto = 0;

            ISet <long>   usedWeights = new HashSet <long>();
            ISet <string> usedKeys    = new HashSet <string>();

            List <Input>  inputs         = new List <Input>();
            List <Update> pendingUpdates = new List <Update>();

            for (int iter = 0; iter < iters; iter++)
            {
                string text;
                while (true)
                {
                    text = RandomText();
                    if (usedKeys.contains(text) == false)
                    {
                        usedKeys.add(text);
                        break;
                    }
                }

                // Carefully pick a weight we never used, to sidestep
                // tie-break problems:
                long weight;
                while (true)
                {
                    weight = Random().nextInt(10 * iters);
                    if (usedWeights.contains(weight) == false)
                    {
                        usedWeights.add(weight);
                        break;
                    }
                }

                if (inputs.size() > 0 && Random().nextInt(4) == 1)
                {
                    // Update an existing suggestion
                    Update update = new Update();
                    update.index  = Random().nextInt(inputs.size());
                    update.weight = weight;
                    Input input = inputs.ElementAt(update.index);
                    pendingUpdates.Add(update);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + " update input=" + input.term.Utf8ToString() + "/" + weight);
                    }
                    suggester.Update(input.term, null, weight, input.term);
                }
                else
                {
                    // Add a new suggestion
                    inputs.Add(new Input(text, weight, new BytesRef(text)));
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + " add input=" + text + "/" + weight);
                    }
                    BytesRef br = new BytesRef(text);
                    suggester.Add(br, null, weight, br);
                }

                if (Random().nextInt(15) == 7)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now refresh suggester");
                    }
                    suggester.Refresh();
                    visibleUpto = inputs.size();
                    foreach (Update update in pendingUpdates)
                    {
                        Input oldInput = inputs.ElementAt(update.index);
                        Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
                        inputs[update.index] = newInput;
                    }
                    pendingUpdates.Clear();
                }

                if (Random().nextInt(50) == 7)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: now close/reopen suggester");
                    }
                    //lookupThread.Finish();
                    stop.Set(true);
                    lookupThread.Join();
                    Assert.Null(error[0], "Unexpcted exception at retry : \n" + stackTraceStr(error[0]));
                    suggester.Dispose();
                    suggester    = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixChars);
                    lookupThread = new LookupThread(this, suggester, stop, error);
                    lookupThread.Start();

                    visibleUpto = inputs.size();
                    foreach (Update update in pendingUpdates)
                    {
                        Input oldInput = inputs.ElementAt(update.index);
                        Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
                        inputs[update.index] = newInput;
                    }
                    pendingUpdates.Clear();
                }

                if (visibleUpto > 0)
                {
                    string query      = RandomText();
                    bool   lastPrefix = Random().nextInt(5) != 1;
                    if (lastPrefix == false)
                    {
                        query += " ";
                    }

                    string[] queryTerms       = Regex.Split(query, "\\s", RegexOptions.Compiled);
                    bool     allTermsRequired = Random().nextInt(10) == 7;
                    bool     doHilite         = Random().nextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: lookup \"" + query + "\" allTermsRequired=" + allTermsRequired + " doHilite=" + doHilite);
                    }

                    // Stupid slow but hopefully correct matching:
                    List <Input> expected = new List <Input>();
                    for (int i = 0; i < visibleUpto; i++)
                    {
                        Input    input      = inputs.ElementAt(i);
                        string[] inputTerms = Regex.Split(input.term.Utf8ToString(), "\\s");
                        bool     match      = false;
                        for (int j = 0; j < queryTerms.Length; j++)
                        {
                            if (j < queryTerms.Length - 1 || lastPrefix == false)
                            {
                                // Exact match
                                for (int k = 0; k < inputTerms.Length; k++)
                                {
                                    if (inputTerms[k].equals(queryTerms[j]))
                                    {
                                        match = true;
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                // Prefix match
                                for (int k = 0; k < inputTerms.Length; k++)
                                {
                                    if (inputTerms[k].StartsWith(queryTerms[j], StringComparison.InvariantCulture))
                                    {
                                        match = true;
                                        break;
                                    }
                                }
                            }
                            if (match)
                            {
                                if (allTermsRequired == false)
                                {
                                    // At least one query term does match:
                                    break;
                                }
                                match = false;
                            }
                            else if (allTermsRequired)
                            {
                                // At least one query term does not match:
                                break;
                            }
                        }

                        if (match)
                        {
                            if (doHilite)
                            {
                                expected.Add(new Input(Hilite(lastPrefix, inputTerms, queryTerms), input.v, input.term));
                            }
                            else
                            {
                                expected.Add(input);
                            }
                        }
                    }

                    expected.Sort(new TestRandomNRTComparator());

                    if (expected.Any())
                    {
                        int topN = TestUtil.NextInt(Random(), 1, expected.size());

                        IList <Lookup.LookupResult> actual = suggester.DoLookup(TestUtil.StringToCharSequence(query, Random()).ToString(), topN, allTermsRequired, doHilite);

                        int expectedCount = Math.Min(topN, expected.size());

                        if (VERBOSE)
                        {
                            Console.WriteLine("  expected:");
                            for (int i = 0; i < expectedCount; i++)
                            {
                                Input x = expected.ElementAt(i);
                                Console.WriteLine("    " + x.term.Utf8ToString() + "/" + x.v);
                            }
                            Console.WriteLine("  actual:");
                            foreach (Lookup.LookupResult result in actual)
                            {
                                Console.WriteLine("    " + result);
                            }
                        }

                        assertEquals(expectedCount, actual.size());
                        for (int i = 0; i < expectedCount; i++)
                        {
                            assertEquals(expected.ElementAt(i).term.Utf8ToString(), actual.ElementAt(i).key.toString());
                            assertEquals(expected.ElementAt(i).v, actual.ElementAt(i).value);
                            assertEquals(expected.ElementAt(i).payload, actual.ElementAt(i).payload);
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  no expected matches");
                        }
                    }
                }
            }

            //lookupThread.finish();
            stop.Set(true);
            lookupThread.Join();
            Assert.Null(error[0], "Unexpcted exception at retry : \n" + stackTraceStr(error[0]));
            suggester.Dispose();
        }

Ejemplo n.º 20

0

Mostrar archivo

        public void TestBasicContext()
        {
            Input[] keys = new Input[] {
                new Input("lend me your ear", 8, new BytesRef("foobar"), AsSet("foo", "bar")),
                new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), AsSet("foo", "baz"))
            };

            DirectoryInfo tempDir = CreateTempDir("analyzingInfixContext");

            for (int iter = 0; iter < 2; iter++)
            {
                AnalyzingInfixSuggester suggester;
                Analyzer a = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false);
                if (iter == 0)
                {
                    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
                    suggester.Build(new InputArrayIterator(keys));
                }
                else
                {
                    // Test again, after close/reopen:
                    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
                }

                // No context provided, all results returned
                IList <Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), 10, true, true);
                assertEquals(2, results.size());
                Lookup.LookupResult result = results.ElementAt(0);
                assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
                assertEquals(10, result.value);
                assertEquals(new BytesRef("foobaz"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("baz")));

                result = results.ElementAt(1);
                assertEquals("lend me your <b>ear</b>", result.key);
                assertEquals(8, result.value);
                assertEquals(new BytesRef("foobar"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("bar")));

                // Both suggestions have "foo" context:
                results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), AsSet("foo"), 10, true, true);
                assertEquals(2, results.size());

                result = results.ElementAt(0);
                assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
                assertEquals(10, result.value);
                assertEquals(new BytesRef("foobaz"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("baz")));

                result = results.ElementAt(1);
                assertEquals("lend me your <b>ear</b>", result.key);
                assertEquals(8, result.value);
                assertEquals(new BytesRef("foobar"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("bar")));

                // Only one has "bar" context:
                results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), AsSet("bar"), 10, true, true);
                assertEquals(1, results.size());

                result = results.ElementAt(0);
                assertEquals("lend me your <b>ear</b>", result.key);
                assertEquals(8, result.value);
                assertEquals(new BytesRef("foobar"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("bar")));

                // Only one has "baz" context:
                results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), AsSet("baz"), 10, true, true);
                assertEquals(1, results.size());

                result = results.ElementAt(0);
                assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
                assertEquals(10, result.value);
                assertEquals(new BytesRef("foobaz"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("baz")));

                // Both have foo or bar:
                results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), AsSet("foo", "bar"), 10, true, true);
                assertEquals(2, results.size());

                result = results.ElementAt(0);
                assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
                assertEquals(10, result.value);
                assertEquals(new BytesRef("foobaz"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("baz")));

                result = results.ElementAt(1);
                assertEquals("lend me your <b>ear</b>", result.key);
                assertEquals(8, result.value);
                assertEquals(new BytesRef("foobar"), result.payload);
                assertNotNull(result.contexts);
                assertEquals(2, result.contexts.Count());
                assertTrue(result.contexts.Contains(new BytesRef("foo")));
                assertTrue(result.contexts.Contains(new BytesRef("bar")));

                suggester.Dispose();
            }
        }

Ejemplo n.º 21

0

Mostrar archivo

        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random);
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.GetReader();

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 22

0

Mostrar archivo

Archivo: TestDocValuesIndexing.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestTooLargeTermSortedSetBytes()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            byte[] bytes = new byte[100000];
            BytesRef b = new BytesRef(bytes);
            Random().NextBytes((byte[])(Array)bytes);
            doc.Add(new SortedSetDocValuesField("dv", b));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("did not get expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }
            iwriter.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 23

0

Mostrar archivo

        public void TestNOT()
        {
            Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);

            assertQueryEquals("NOT foo AND bar", a, "-foo +bar");
        }

Ejemplo n.º 24

0

Mostrar archivo

Archivo: TestTermVectorsWriter.cs Proyecto: paulirwin/lucene.net

        public virtual void TestEndOffsetPositionWithCachingTokenFilter()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            IOException priorException = null;
            TokenStream stream = analyzer.TokenStream("field", new StringReader("abcd    "));
            try
            {
                stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
                TokenStream cachedStream = new CachingTokenFilter(stream);
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets = true;
                Field f = new Field("field", cachedStream, customType);
                doc.Add(f);
                doc.Add(f);
                w.AddDocument(doc);
            }
            catch (IOException e)
            {
                priorException = e;
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(priorException, stream);
            }
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            TermsEnum termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(2, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            r.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 25

0

Mostrar archivo

        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random().Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && Random().NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).SetStringValue(myID);

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs);

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.GetSizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }

Ejemplo n.º 26

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestMissingSortedBytes()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            conf.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, conf);
            Document doc = new Document();
            doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
            iwriter.AddDocument(doc);
            // 2nd doc missing the DV field
            iwriter.AddDocument(new Document());
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            Debug.Assert(ireader.Leaves.Count == 1);
            SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
            BytesRef scratch = new BytesRef();
            dv.LookupOrd(dv.GetOrd(0), scratch);
            Assert.AreEqual(new BytesRef("hello world 2"), scratch);
            if (DefaultCodecSupportsDocsWithField())
            {
                Assert.AreEqual(-1, dv.GetOrd(1));
            }
            dv.Get(1, scratch);
            Assert.AreEqual(new BytesRef(""), scratch);
            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 27

0

Mostrar archivo

Archivo: TestNRTCachingDirectory.cs Proyecto: zalintyre/lucenenet

        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random, cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int numDocs            = TestUtil.NextInt32(Random, 100, 400);

            if (Verbose)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random.Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.IndexWriter, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }

Ejemplo n.º 28

0

Mostrar archivo

Archivo: TestDocValuesIndexing.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestAddNumericTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            doc.Add(new NumericDocValuesField("dv", 1));
            doc.Add(new NumericDocValuesField("dv", 2));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 29

0

Mostrar archivo

        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null);
            BytesRef        term;

            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }

Ejemplo n.º 30

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedTermsEnum()
        {
            Directory directory = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();
            doc.Add(new SortedDocValuesField("field", new BytesRef("hello")));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new SortedDocValuesField("field", new BytesRef("world")));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new SortedDocValuesField("field", new BytesRef("beer")));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;
            iwriter.Dispose();

            SortedDocValues dv = GetOnlySegmentReader(ireader).GetSortedDocValues("field");
            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.TermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());
            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 31

0

Mostrar archivo

Archivo: TestMultiTermHighlighting.cs Proyecto: zhangbo27/lucenenet

        public void TestWildcardInBoolean()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            //PostingsHighlighter highlighter = new PostingsHighlighter() {
            //      @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}
            //    };
            BooleanQuery query = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // must not
            query = new BooleanQuery();
            query.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("bogus", "te*")), Occur.MUST_NOT);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 32

0

Mostrar archivo

Archivo: TestMultiTermHighlighting.cs Proyecto: zhangbo27/lucenenet

        public void TestWhichMTQMatched()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            //PostingsHighlighter highlighter = new PostingsHighlighter() {
            //      @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}
            //    };
            BooleanQuery query = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "one")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "se*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(1, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter just bolds each hit:
            assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

            // Now use our own formatter, that also stuffs the
            // matching term's text into the result:
            highlighter = new PostingsHighlighterAnalyzerAndFormatterHelper(analyzer, new PassageFormatterHelper());

            //highlighter = new PostingsHighlighter()
            //{
            //    @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}

            //@Override
            //      protected PassageFormatter getFormatter(String field)
            //{
            //    return new PassageFormatter() {

            //          @Override
            //          public Object format(Passage passages[], String content)
            //{
            //    // Copied from DefaultPassageFormatter, but
            //    // tweaked to include the matched term:
            //    StringBuilder sb = new StringBuilder();
            //    int pos = 0;
            //    for (Passage passage : passages)
            //    {
            //        // don't add ellipsis if its the first one, or if its connected.
            //        if (passage.startOffset > pos && pos > 0)
            //        {
            //            sb.append("... ");
            //        }
            //        pos = passage.startOffset;
            //        for (int i = 0; i < passage.numMatches; i++)
            //        {
            //            int start = passage.matchStarts[i];
            //            int end = passage.matchEnds[i];
            //            // its possible to have overlapping terms
            //            if (start > pos)
            //            {
            //                sb.append(content, pos, start);
            //            }
            //            if (end > pos)
            //            {
            //                sb.append("<b>");
            //                sb.append(content, Math.max(pos, start), end);
            //                sb.append('(');
            //                sb.append(passage.getMatchTerms()[i].utf8ToString());
            //                sb.append(')');
            //                sb.append("</b>");
            //                pos = end;
            //            }
            //        }
            //        // its possible a "term" from the analyzer could span a sentence boundary.
            //        sb.append(content, pos, Math.max(pos, passage.endOffset));
            //        pos = passage.endOffset;
            //    }
            //    return sb.toString();
            //}
            //        };
            //      }
            //    };


            assertEquals(1, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter bolds each hit:
            assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);

            ir.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 33

0

Mostrar archivo

Archivo: ThreadedIndexingAndSearchingTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random random = new Random(Random().Next());
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);
            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;
            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));
            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",",  delIDs.ToArray()));
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!
            /*if (es != null)
            {
              es.shutdown();
              es.awaitTermination(1, TimeUnit.SECONDS);
            }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }

Ejemplo n.º 34

0

Mostrar archivo

        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            string[] docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 4.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                // doc 2 and 3 only get a score from low freq terms
                assertEquals(
                    new JCG.HashSet <string> {
                    @"2", @"3"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[2].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(
                    new JCG.HashSet <string> {
                    @"0", @"2"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[0].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 35

0

Mostrar archivo

        public void TestRandomMinPrefixLength()
        {
            Input[] keys = new Input[] {
                new Input("lend me your ear", 8, new BytesRef("foobar")),
                new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
            };
            DirectoryInfo tempDir = CreateTempDir("AnalyzingInfixSuggesterTest");

            Analyzer a = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false);
            int      minPrefixLength          = Random().nextInt(10);
            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixLength);

            suggester.Build(new InputArrayIterator(keys));

            for (int i = 0; i < 2; i++)
            {
                for (int j = 0; j < 2; j++)
                {
                    bool doHighlight = j == 0;

                    IList <Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), 10, true, doHighlight);
                    assertEquals(2, results.size());
                    if (doHighlight)
                    {
                        assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].key);
                    }
                    else
                    {
                        assertEquals("a penny saved is a penny earned", results[0].key);
                    }
                    assertEquals(10, results[0].value);
                    if (doHighlight)
                    {
                        assertEquals("lend me your <b>ear</b>", results[1].key);
                    }
                    else
                    {
                        assertEquals("lend me your ear", results[1].key);
                    }
                    assertEquals(new BytesRef("foobaz"), results[0].payload);
                    assertEquals(8, results[1].value);
                    assertEquals(new BytesRef("foobar"), results[1].payload);

                    results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random()).ToString(), 10, true, doHighlight);
                    assertEquals(1, results.size());
                    if (doHighlight)
                    {
                        assertEquals("lend me your <b>ear</b>", results[0].key);
                    }
                    else
                    {
                        assertEquals("lend me your ear", results[0].key);
                    }
                    assertEquals(8, results[0].value);
                    assertEquals(new BytesRef("foobar"), results[0].payload);

                    results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random()).ToString(), 10, true, doHighlight);
                    assertEquals(1, results.size());
                    if (doHighlight)
                    {
                        assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results[0].key);
                    }
                    else
                    {
                        assertEquals("a penny saved is a penny earned", results[0].key);
                    }
                    assertEquals(10, results[0].value);
                    assertEquals(new BytesRef("foobaz"), results[0].payload);

                    results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random()).ToString(), 10, true, doHighlight);
                    assertEquals(1, results.size());
                    if (doHighlight)
                    {
                        assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results[0].key);
                    }
                    else
                    {
                        assertEquals("a penny saved is a penny earned", results[0].key);
                    }
                    assertEquals(10, results[0].value);
                    assertEquals(new BytesRef("foobaz"), results[0].payload);
                }

                // Make sure things still work after close and reopen:
                suggester.Dispose();
                suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixLength);
            }
            suggester.Dispose();
        }

Ejemplo n.º 36

0

Mostrar archivo

Archivo: CommonTermsQueryTest.cs Proyecto: vijaysirigiri/lucenenet

        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random());
            RandomIndexWriter w        = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);

            string[] docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 4F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0F);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(new HashSet <string>(Arrays.AsList(@"2", @"3")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id"))));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(new HashSet <string>(Arrays.AsList(@"0", @"2")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id"))));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 37

0

Mostrar archivo

        public void TestBasicNRT()
        {
            Input[] keys = new Input[] {
                new Input("lend me your ear", 8, new BytesRef("foobar")),
            };

            Analyzer a = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false);
            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3);

            suggester.Build(new InputArrayIterator(keys));

            IList <Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), 10, true, true);

            assertEquals(1, results.size());
            assertEquals("lend me your <b>ear</b>", results.ElementAt(0).key);
            assertEquals(8, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobar"), results.ElementAt(0).payload);

            // Add a new suggestion:
            suggester.Add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));

            // Must refresh to see any newly added suggestions:
            suggester.Refresh();

            results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), 10, true, true);
            assertEquals(2, results.size());
            assertEquals("a penny saved is a penny <b>ear</b>ned", results.ElementAt(0).key);
            assertEquals(10, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobaz"), results.ElementAt(0).payload);

            assertEquals("lend me your <b>ear</b>", results.ElementAt(1).key);
            assertEquals(8, results.ElementAt(1).value);
            assertEquals(new BytesRef("foobar"), results.ElementAt(1).payload);

            results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random()).ToString(), 10, true, true);
            assertEquals(1, results.size());
            assertEquals("lend me your <b>ear</b>", results.ElementAt(0).key);
            assertEquals(8, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobar"), results.ElementAt(0).payload);

            results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random()).ToString(), 10, true, true);
            assertEquals(1, results.size());
            assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.ElementAt(0).key);
            assertEquals(10, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobaz"), results.ElementAt(0).payload);

            results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random()).ToString(), 10, true, true);
            assertEquals(1, results.size());
            assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.ElementAt(0).key);
            assertEquals(10, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobaz"), results.ElementAt(0).payload);

            // Change the weight:
            suggester.Update(new BytesRef("lend me your ear"), null, 12, new BytesRef("foobox"));

            // Must refresh to see any newly added suggestions:
            suggester.Refresh();

            results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random()).ToString(), 10, true, true);
            assertEquals(2, results.size());
            assertEquals("lend me your <b>ear</b>", results.ElementAt(0).key);
            assertEquals(12, results.ElementAt(0).value);
            assertEquals(new BytesRef("foobox"), results.ElementAt(0).payload);
            assertEquals("a penny saved is a penny <b>ear</b>ned", results.ElementAt(1).key);
            assertEquals(10, results.ElementAt(1).value);
            assertEquals(new BytesRef("foobaz"), results.ElementAt(1).payload);
            suggester.Dispose();
        }

Ejemplo n.º 38

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestEmptyBytes()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            conf.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, conf);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("")));
            iwriter.AddDocument(doc);
            doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("")));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            Debug.Assert(ireader.Leaves.Count == 1);
            BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
            BytesRef scratch = new BytesRef();
            dv.Get(0, scratch);
            Assert.AreEqual("", scratch.Utf8ToString());
            dv.Get(1, scratch);
            Assert.AreEqual("", scratch.Utf8ToString());

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 39

0

Mostrar archivo

Archivo: TestRollingUpdates.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestRollingUpdates_Mem()
        {
            Random random = new Random(Random().Next());
            BaseDirectoryWrapper dir = NewDirectory();
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int SIZE = AtLeast(20);
            int id = 0;
            IndexReader r = null;
            IndexSearcher s = null;
            int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));
            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;
            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc = docs.NextDoc();
                string myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();
            infos.Read(dir);
            long totalBytes = 0;
            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;
            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }

Ejemplo n.º 40

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedBytesTwoDocumentsMerged()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            conf.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, conf);
            Document doc = new Document();
            doc.Add(NewField("id", "0", StringField.TYPE_STORED));
            doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
            iwriter.AddDocument(doc);
            iwriter.Commit();
            doc = new Document();
            doc.Add(NewField("id", "1", StringField.TYPE_STORED));
            doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            Debug.Assert(ireader.Leaves.Count == 1);
            SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
            Assert.AreEqual(2, dv.ValueCount); // 2 ords
            BytesRef scratch = new BytesRef();
            dv.LookupOrd(0, scratch);
            Assert.AreEqual(new BytesRef("hello world 1"), scratch);
            dv.LookupOrd(1, scratch);
            Assert.AreEqual(new BytesRef("hello world 2"), scratch);
            for (int i = 0; i < 2; i++)
            {
                Document doc2 = ((AtomicReader)ireader.Leaves[0].Reader).Document(i);
                string expected;
                if (doc2.Get("id").Equals("0"))
                {
                    expected = "hello world 1";
                }
                else
                {
                    expected = "hello world 2";
                }
                dv.LookupOrd(dv.GetOrd(i), scratch);
                Assert.AreEqual(expected, scratch.Utf8ToString());
            }

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 41

0

Mostrar archivo

        public void TestRandom()
        {
            string[]      terms = new string[TestUtil.NextInt32(Random, 2, 10)];
            ISet <string> seen  = new HashSet <string>();

            while (seen.size() < terms.Length)
            {
                string token = TestUtil.RandomSimpleString(Random, 1, 5);
                if (!seen.contains(token))
                {
                    terms[seen.size()] = token;
                    seen.add(token);
                }
            }

            Analyzer a = new MockAnalyzer(Random);

            int  numDocs   = AtLeast(10);
            long totTokens = 0;

            string[][] docs = new string[numDocs][];
            for (int i = 0; i < numDocs; i++)
            {
                docs[i] = new string[AtLeast(100)];
                if (VERBOSE)
                {
                    Console.Write("  doc " + i + ":");
                }
                for (int j = 0; j < docs[i].Length; j++)
                {
                    docs[i][j] = GetZipfToken(terms);
                    if (VERBOSE)
                    {
                        Console.Write(" " + docs[i][j]);
                    }
                }
                if (VERBOSE)
                {
                    Console.WriteLine();
                }
                totTokens += docs[i].Length;
            }

            int grams = TestUtil.NextInt32(Random, 1, 4);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Length + " terms; " + numDocs + " docs; " + grams + " grams");
            }

            // Build suggester model:
            FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte)0x20);

            sug.Build(new TestRandomInputIterator(this, docs));

            // Build inefficient but hopefully correct model:
            List <IDictionary <string, int?> > gramCounts = new List <IDictionary <string, int?> >(grams);

            for (int gram = 0; gram < grams; gram++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: build model for gram=" + gram);
                }
                IDictionary <string, int?> model = new HashMap <string, int?>();
                gramCounts.Add(model);
                foreach (string[] doc in docs)
                {
                    for (int i = 0; i < doc.Length - gram; i++)
                    {
                        StringBuilder b = new StringBuilder();
                        for (int j = i; j <= i + gram; j++)
                        {
                            if (j > i)
                            {
                                b.append(' ');
                            }
                            b.append(doc[j]);
                        }
                        string token    = b.toString();
                        int?   curCount = model.ContainsKey(token) ? model[token] : null;
                        if (curCount == null)
                        {
                            model.Put(token, 1);
                        }
                        else
                        {
                            model.Put(token, 1 + curCount);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("  add '" + token + "' -> count=" + (model.ContainsKey(token) ? model[token].ToString() : ""));
                        }
                    }
                }
            }

            int lookups = AtLeast(100);

            for (int iter = 0; iter < lookups; iter++)
            {
                string[] tokens = new string[TestUtil.NextInt32(Random, 1, 5)];
                for (int i = 0; i < tokens.Length; i++)
                {
                    tokens[i] = GetZipfToken(terms);
                }

                // Maybe trim last token; be sure not to create the
                // empty string:
                int trimStart;
                if (tokens.Length == 1)
                {
                    trimStart = 1;
                }
                else
                {
                    trimStart = 0;
                }
                int trimAt = TestUtil.NextInt32(Random, trimStart, tokens[tokens.Length - 1].Length);
                tokens[tokens.Length - 1] = tokens[tokens.Length - 1].Substring(0, trimAt - 0);

                int           num = TestUtil.NextInt32(Random, 1, 100);
                StringBuilder b   = new StringBuilder();
                foreach (string token in tokens)
                {
                    b.append(' ');
                    b.append(token);
                }
                string query = b.toString();
                query = query.Substring(1);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " query='" + query + "' num=" + num);
                }

                // Expected:
                List <Lookup.LookupResult> expected = new List <Lookup.LookupResult>();
                double backoff = 1.0;
                seen = new HashSet <string>();

                if (VERBOSE)
                {
                    Console.WriteLine("  compute expected");
                }
                for (int i = grams - 1; i >= 0; i--)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("    grams=" + i);
                    }

                    if (tokens.Length < i + 1)
                    {
                        // Don't have enough tokens to use this model
                        if (VERBOSE)
                        {
                            Console.WriteLine("      skip");
                        }
                        continue;
                    }

                    if (i == 0 && tokens[tokens.Length - 1].Length == 0)
                    {
                        // Never suggest unigrams from empty string:
                        if (VERBOSE)
                        {
                            Console.WriteLine("      skip unigram priors only");
                        }
                        continue;
                    }

                    // Build up "context" ngram:
                    b = new StringBuilder();
                    for (int j = tokens.Length - i - 1; j < tokens.Length - 1; j++)
                    {
                        b.append(' ');
                        b.append(tokens[j]);
                    }
                    string context = b.toString();
                    if (context.Length > 0)
                    {
                        context = context.Substring(1);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("      context='" + context + "'");
                    }
                    long contextCount;
                    if (context.Length == 0)
                    {
                        contextCount = totTokens;
                    }
                    else
                    {
                        //int? count = gramCounts.get(i - 1).get(context);
                        var gramCount = gramCounts[i - 1];
                        int?count     = gramCount.ContainsKey(context) ? gramCount[context] : null;
                        if (count == null)
                        {
                            // We never saw this context:
                            backoff *= FreeTextSuggester.ALPHA;
                            if (VERBOSE)
                            {
                                Console.WriteLine("      skip: never saw context");
                            }
                            continue;
                        }
                        contextCount = count.GetValueOrDefault();
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("      contextCount=" + contextCount);
                    }
                    IDictionary <string, int?> model = gramCounts[i];

                    // First pass, gather all predictions for this model:
                    if (VERBOSE)
                    {
                        Console.WriteLine("      find terms w/ prefix=" + tokens[tokens.Length - 1]);
                    }
                    List <Lookup.LookupResult> tmp = new List <Lookup.LookupResult>();
                    foreach (string term in terms)
                    {
                        if (term.StartsWith(tokens[tokens.Length - 1], StringComparison.Ordinal))
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("        term=" + term);
                            }
                            if (seen.contains(term))
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("          skip seen");
                                }
                                continue;
                            }
                            string ngram = (context + " " + term).Trim();
                            //Integer count = model.get(ngram);
                            int?count = model.ContainsKey(ngram) ? model[ngram] : null;
                            if (count != null)
                            {
                                // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes
                                // return numbers that are greater than long.MaxValue, which results in a negative long number.
                                // This is also the way it is being done in the FreeTextSuggester to work around the issue.
                                Lookup.LookupResult lr = new Lookup.LookupResult(ngram, (long)(long.MaxValue * ((decimal)backoff * (decimal)count / contextCount)));
                                tmp.Add(lr);
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      add tmp key='" + lr.Key + "' score=" + lr.Value);
                                }
                            }
                        }
                    }

                    // Second pass, trim to only top N, and fold those
                    // into overall suggestions:
                    tmp.Sort(byScoreThenKey);
                    if (tmp.size() > num)
                    {
                        //tmp.subList(num, tmp.size()).clear();
                        tmp.RemoveRange(num, tmp.size() - num);
                    }
                    foreach (Lookup.LookupResult result in tmp)
                    {
                        string key = result.Key.toString();
                        int    idx = key.LastIndexOf(' ');
                        string lastToken;
                        if (idx != -1)
                        {
                            lastToken = key.Substring(idx + 1);
                        }
                        else
                        {
                            lastToken = key;
                        }
                        if (!seen.contains(lastToken))
                        {
                            seen.add(lastToken);
                            expected.Add(result);
                            if (VERBOSE)
                            {
                                Console.WriteLine("      keep key='" + result.Key + "' score=" + result.Value);
                            }
                        }
                    }

                    backoff *= FreeTextSuggester.ALPHA;
                }

                expected.Sort(byScoreThenKey);

                if (expected.size() > num)
                {
                    expected.RemoveRange(num, expected.size() - num);
                }

                // Actual:
                IList <Lookup.LookupResult> actual = sug.DoLookup(query, num);

                if (VERBOSE)
                {
                    Console.WriteLine("  expected: " + expected);
                    Console.WriteLine("    actual: " + actual);
                }

                assertEquals(expected.ToString(), actual.ToString());
            }
        }

Ejemplo n.º 42

0

Mostrar archivo

Archivo: TestDuelingAnalyzers.cs Proyecto: ChristopherHaws/lucenenet

 public virtual void TestLetterAsciiHuge()
 {
     Random random = Random();
     int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
     MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
     left.MaxTokenLength = 255; // match CharTokenizer's max token length
     Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this);
     int numIterations = AtLeast(50);
     for (int i = 0; i < numIterations; i++)
     {
         string s = TestUtil.RandomSimpleString(random, maxLength);
         assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
     }
 }

Ejemplo n.º 43

0

Mostrar archivo

Archivo: TestNorms.cs Proyecto: joyanta/lucene.net

        public virtual void TestCustomEncoder()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            config.SetSimilarity(new CustomNormEncodingSimilarity(this));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            Document doc = new Document();
            Field foo = NewTextField("foo", "", Field.Store.NO);
            Field bar = NewTextField("bar", "", Field.Store.NO);
            doc.Add(foo);
            doc.Add(bar);

            for (int i = 0; i < 100; i++)
            {
                bar.StringValue = "singleton";
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.Reader;
            writer.Dispose();

            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");
            for (int i = 0; i < reader.MaxDoc(); i++)
            {
                Assert.AreEqual(0, fooNorms.Get(i));
            }

            NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar");
            for (int i = 0; i < reader.MaxDoc(); i++)
            {
                Assert.AreEqual(1, barNorms.Get(i));
            }

            reader.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 44

0

Mostrar archivo

Archivo: TestDuelingAnalyzers.cs Proyecto: Cefa68000/lucenenet

 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testLetterHtmlishHuge() throws Exception
 public virtual void testLetterHtmlishHuge()
 {
     Random random = random();
     int maxLength = 1024; // this is number of elements, not chars!
     MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
     left.MaxTokenLength = 255; // match CharTokenizer's max token length
     Analyzer right = new AnalyzerAnonymousInnerClassHelper4(this);
     int numIterations = atLeast(50);
     for (int i = 0; i < numIterations; i++)
     {
       string s = TestUtil.randomHtmlishString(random, maxLength);
       assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
     }
 }

Ejemplo n.º 45

0

Mostrar archivo

Archivo: TestICUMultiTermHighlighting.cs Proyecto: zhuthree/lucenenet

        public void TestRanges()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            IndexSearcher          searcher    = NewSearcher(ir);
            ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            Query   query   = TermRangeQuery.NewStringRange("body", "ta", "tf", true, true);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // null start
            query   = TermRangeQuery.NewStringRange("body", null, "tf", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);

            // null end
            query   = TermRangeQuery.NewStringRange("body", "ta", null, true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact start inclusive
            query   = TermRangeQuery.NewStringRange("body", "test", "tf", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact end inclusive
            query   = TermRangeQuery.NewStringRange("body", "ta", "test", true, true);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // exact start exclusive
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("body", "test", "tf", false, true), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            // exact end exclusive
            bq = new BooleanQuery();
            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("body", "ta", "test", true, false), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            // wrong field
            bq = new BooleanQuery();
            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            bq.Add(TermRangeQuery.NewStringRange("bogus", "ta", "tf", true, true), Occur.SHOULD);
            topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", bq, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 46

0

Mostrar archivo

Archivo: CommonTermsQueryTest.cs Proyecto: rohitkumbhar/lucenenet

        public void TestBasics()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            var docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader r = w.GetReader();
            IndexSearcher s = NewSearcher(r);
            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "restaurant"));
                query.Add(new Term("field", "universe"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 47

0

Mostrar archivo

        public virtual void TestBasic()
        {
            Directory   dir      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb   = new StringBuilder(265);
            string        term = "term";

            for (int i = 0; i < 30; i++)
            {
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field  noTf    = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);
                doc.Add(noTf);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);
                doc.Add(tf);

                writer.AddDocument(doc);
                //System.out.println(d);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             */
            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d  = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            try
            {
                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            }
            catch (Exception e)
            {
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                {
                    cause = cause.InnerException;
                }
                if (!(cause is InvalidOperationException))
                {
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed
            }

            searcher.Search(q1, new CountingHitCollectorAnonymousClass(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q2, new CountingHitCollectorAnonymousClass2(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q3, new CountingHitCollectorAnonymousClass3(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q4, new CountingHitCollectorAnonymousClass4(this));
            //System.out.println(CountingHitCollector.getCount());

            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousClass5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

            reader.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 48

0

Mostrar archivo

        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs);
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }

Ejemplo n.º 49

0

Mostrar archivo

        public virtual void Test()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            LineFileDocs docs         = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int          charsToIndex = AtLeast(100000);
            int          charsIndexed = 0;

            //System.out.println("bytesToIndex=" + charsToIndex);
            while (charsIndexed < charsToIndex)
            {
                Document doc = docs.NextDoc();
                charsIndexed += doc.Get("body").Length;
                w.AddDocument(doc);
                //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
            }
            IndexReader r = w.GetReader();

            //System.out.println("numDocs=" + r.NumDocs);
            w.Dispose();

            IndexSearcher s         = NewSearcher(r);
            Terms         terms     = MultiFields.GetFields(r).GetTerms("body");
            int           termCount = 0;
            TermsEnum     termsEnum = terms.GetIterator(null);

            while (termsEnum.Next() != null)
            {
                termCount++;
            }
            Assert.IsTrue(termCount > 0);

            // Target ~10 terms to search:
            double chance = 10.0 / termCount;

            termsEnum = terms.GetIterator(termsEnum);
            IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>();

            while (termsEnum.Next() != null)
            {
                if (Random.NextDouble() <= chance)
                {
                    BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term);
                    answers[term] = s.Search(new TermQuery(new Term("body", term)), 100);
                }
            }

            if (answers.Count > 0)
            {
                CountdownEvent startingGun = new CountdownEvent(1);
                int            numThreads  = TestUtil.NextInt32(Random, 2, 5);
                ThreadJob[]    threads     = new ThreadJob[numThreads];
                for (int threadID = 0; threadID < numThreads; threadID++)
                {
                    ThreadJob thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun);
                    threads[threadID] = thread;
                    thread.Start();
                }
                startingGun.Signal();
                foreach (ThreadJob thread in threads)
                {
                    thread.Join();
                }
            }
            r.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 50

0

Mostrar archivo

Archivo: TestPhraseQuery.cs Proyecto: sean-gilliam/lucenenet

        public virtual void TestRandomPhrases()
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = new MockAnalyzer(Random);

            RandomIndexWriter       w    = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy()));
            IList <IList <string> > docs = new List <IList <string> >();

            Documents.Document d = new Documents.Document();
            Field f = NewTextField("f", "", Field.Store.NO);

            d.Add(f);

            Random r = Random;

            int NUM_DOCS = AtLeast(10);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                // must be > 4096 so it spans multiple chunks
                int termCount = TestUtil.NextInt32(Random, 4097, 8200);

                IList <string> doc = new List <string>();

                StringBuilder sb = new StringBuilder();
                while (doc.Count < termCount)
                {
                    if (r.Next(5) == 1 || docs.Count == 0)
                    {
                        // make new non-empty-string term
                        string term;
                        while (true)
                        {
                            term = TestUtil.RandomUnicodeString(r);
                            if (term.Length > 0)
                            {
                                break;
                            }
                        }
                        Exception   priorException = null; // LUCENENET: No need to cast to IOExcpetion
                        TokenStream ts             = analyzer.GetTokenStream("ignore", new StringReader(term));
                        try
                        {
                            ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>();
                            ts.Reset();
                            while (ts.IncrementToken())
                            {
                                string text = termAttr.ToString();
                                doc.Add(text);
                                sb.Append(text).Append(' ');
                            }
                            ts.End();
                        }
                        catch (Exception e) when(e.IsIOException())
                        {
                            priorException = e;
                        }
                        finally
                        {
                            IOUtils.DisposeWhileHandlingException(priorException, ts);
                        }
                    }
                    else
                    {
                        // pick existing sub-phrase
                        IList <string> lastDoc = docs[r.Next(docs.Count)];
                        int            len     = TestUtil.NextInt32(r, 1, 10);
                        int            start   = r.Next(lastDoc.Count - len);
                        for (int k = start; k < start + len; k++)
                        {
                            string t = lastDoc[k];
                            doc.Add(t);
                            sb.Append(t).Append(' ');
                        }
                    }
                }
                docs.Add(doc);
                f.SetStringValue(sb.ToString());
                w.AddDocument(d);
            }

            IndexReader   reader = w.GetReader();
            IndexSearcher s      = NewSearcher(reader);

            w.Dispose();

            // now search
            int num = AtLeast(10);

            for (int i = 0; i < num; i++)
            {
                int            docID = r.Next(docs.Count);
                IList <string> doc   = docs[docID];

                int           numTerm = TestUtil.NextInt32(r, 2, 20);
                int           start   = r.Next(doc.Count - numTerm);
                PhraseQuery   pq      = new PhraseQuery();
                StringBuilder sb      = new StringBuilder();
                for (int t = start; t < start + numTerm; t++)
                {
                    pq.Add(new Term("f", doc[t]));
                    sb.Append(doc[t]).Append(' ');
                }

                TopDocs hits  = s.Search(pq, NUM_DOCS);
                bool    found = false;
                for (int j = 0; j < hits.ScoreDocs.Length; j++)
                {
                    if (hits.ScoreDocs[j].Doc == docID)
                    {
                        found = true;
                        break;
                    }
                }

                Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start);
            }

            reader.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 51

0

Mostrar archivo

        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int                    numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer           analyzer    = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT           = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount);
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms);
            Assert.IsTrue(flushPolicy.FlushOnRAM);
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.flushControl;

            Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);

            Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled);
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes);
            dir.Dispose();
        }

Ejemplo n.º 52

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedBytes()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, conf);
            Document doc = new Document();
            string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string text = "this is the text to be indexed. " + longTerm;
            doc.Add(NewTextField("fieldname", text, Field.Store.YES));
            doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world")));
            iwriter.AddDocument(doc);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            IndexSearcher isearcher = new IndexSearcher(ireader);

            Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
            Query query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits = isearcher.Search(query, null, 1);
            Assert.AreEqual(1, hits.TotalHits);
            BytesRef scratch = new BytesRef();
            // Iterate through the results:
            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
                Assert.AreEqual(text, hitDoc.Get("fieldname"));
                Debug.Assert(ireader.Leaves.Count == 1);
                SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
                dv.LookupOrd(dv.GetOrd(hits.ScoreDocs[i].Doc), scratch);
                Assert.AreEqual(new BytesRef("hello world"), scratch);
            }

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 53

0

Mostrar archivo

Archivo: TestDuelingAnalyzers.cs Proyecto: Cefa68000/lucenenet

 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testLetterUnicode() throws Exception
 public virtual void testLetterUnicode()
 {
     Random random = random();
     Analyzer left = new MockAnalyzer(random(), jvmLetter, false);
     Analyzer right = new AnalyzerAnonymousInnerClassHelper5(this);
     for (int i = 0; i < 1000; i++)
     {
       string s = TestUtil.randomUnicodeString(random);
       assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
     }
 }

Ejemplo n.º 54

0

Mostrar archivo

Archivo: BaseDocValuesFormatTestCase.cs Proyecto: ChristopherHaws/lucenenet

        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedMergeAwayAllValues()
        {
            Directory directory = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();
            doc.Add(new StringField("id", "0", Field.Store.NO));
            iwriter.AddDocument(doc);
            doc = new Document();
            doc.Add(new StringField("id", "1", Field.Store.NO));
            doc.Add(new SortedDocValuesField("field", new BytesRef("hello")));
            iwriter.AddDocument(doc);
            iwriter.Commit();
            iwriter.DeleteDocuments(new Term("id", "1"));
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;
            iwriter.Dispose();

            SortedDocValues dv = GetOnlySegmentReader(ireader).GetSortedDocValues("field");
            if (DefaultCodecSupportsDocsWithField())
            {
                Assert.AreEqual(-1, dv.GetOrd(0));
                Assert.AreEqual(0, dv.ValueCount);
            }
            else
            {
                Assert.AreEqual(0, dv.GetOrd(0));
                Assert.AreEqual(1, dv.ValueCount);
                BytesRef @ref = new BytesRef();
                dv.LookupOrd(0, @ref);
                Assert.AreEqual(new BytesRef(), @ref);
            }

            ireader.Dispose();
            directory.Dispose();
        }

Ejemplo n.º 55

0

Mostrar archivo

Archivo: CommonTermsQueryTest.cs Proyecto: rohitkumbhar/lucenenet

        public void TestRandomIndex()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);
            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader = w.GetReader();
            AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader);
            string field = @"body";
            Terms terms = wrapper.GetTerms(field);
            var lowFreqQueue = new AnonymousPriorityQueue(this, 5);
            Util.PriorityQueue<TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
            try
            {
                TermsEnum iterator = terms.GetIterator(null);
                while (iterator.Next() != null)
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List<TermAndFreq> highTerms = QueueToList(highFreqQueue);
                List<TermAndFreq> lowTerms = QueueToList(lowFreqQueue);
                IndexSearcher searcher = NewSearcher(reader);
                Occur lowFreqOccur = RandomOccur(Random);
                BooleanQuery verifyQuery = new BooleanQuery();
                CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
                }

                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);
                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet<int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }

Ejemplo n.º 56

0

Mostrar archivo

Archivo: TestDuelingAnalyzers.cs Proyecto: Cefa68000/lucenenet

 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testLetterUnicodeHuge() throws Exception
 public virtual void testLetterUnicodeHuge()
 {
     Random random = random();
     int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge
     MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
     left.MaxTokenLength = 255; // match CharTokenizer's max token length
     Analyzer right = new AnalyzerAnonymousInnerClassHelper6(this);
     int numIterations = atLeast(50);
     for (int i = 0; i < numIterations; i++)
     {
       string s = TestUtil.randomUnicodeString(random, maxLength);
       assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
     }
 }

Ejemplo n.º 57

0

Mostrar archivo

Archivo: TestIndexWriterOutOfFileDescriptors.cs Proyecto: zhuthree/lucenenet

        public virtual void Test()
        {
            MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors"));

            dir.PreventDoubleWrite = false;
            double rate = Random.NextDouble() * 0.01;

            //System.out.println("rate=" + rate);
            dir.RandomIOExceptionRateOnOpen = rate;
            int                  iters       = AtLeast(20);
            LineFileDocs         docs        = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            IndexReader          r           = null;
            DirectoryReader      r2          = null;
            bool                 any         = false;
            MockDirectoryWrapper dirCopy     = null;
            int                  lastNumDocs = 0;

            for (int iter = 0; iter < iters; iter++)
            {
                IndexWriter w = null;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                try
                {
                    MockAnalyzer analyzer = new MockAnalyzer(Random);
                    analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
                    IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

                    if (VERBOSE)
                    {
                        // Do this ourselves instead of relying on LTC so
                        // we see incrementing messageID:
                        iwc.SetInfoStream(new TextWriterInfoStream(Console.Out));
                    }
                    var ms = iwc.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }
                    w = new IndexWriter(dir, iwc);
                    if (r != null && Random.Next(5) == 3)
                    {
                        if (Random.NextBoolean())
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes IR[]");
                            }
                            w.AddIndexes(new IndexReader[] { r });
                        }
                        else
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes Directory[]");
                            }
                            w.AddIndexes(new Directory[] { dirCopy });
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: addDocument");
                        }
                        w.AddDocument(docs.NextDoc());
                    }
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    w.Dispose();
                    w = null;

                    // NOTE: this is O(N^2)!  Only enable for temporary debugging:
                    //dir.setRandomIOExceptionRateOnOpen(0.0);
                    //TestUtil.CheckIndex(dir);
                    //dir.setRandomIOExceptionRateOnOpen(rate);

                    // Verify numDocs only increases, to catch IndexWriter
                    // accidentally deleting the index:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    Assert.IsTrue(DirectoryReader.IndexExists(dir));
                    if (r2 == null)
                    {
                        r2 = DirectoryReader.Open(dir);
                    }
                    else
                    {
                        DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2);
                        if (r3 != null)
                        {
                            r2.Dispose();
                            r2 = r3;
                        }
                    }
                    Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs);
                    lastNumDocs = r2.NumDocs;
                    //System.out.println("numDocs=" + lastNumDocs);
                    dir.RandomIOExceptionRateOnOpen = rate;

                    any = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": success");
                    }
                }
                catch (IOException ioe)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": exception");
                        Console.WriteLine(ioe.ToString());
                        Console.Write(ioe.StackTrace);
                    }
                    if (w != null)
                    {
                        // NOTE: leave random IO exceptions enabled here,
                        // to verify that rollback does not try to write
                        // anything:
                        w.Rollback();
                    }
                }

                if (any && r == null && Random.NextBoolean())
                {
                    // Make a copy of a non-empty index so we can use
                    // it to addIndexes later:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    r       = DirectoryReader.Open(dir);
                    dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
                    HashSet <string> files = new HashSet <string>();
                    foreach (string file in dir.ListAll())
                    {
                        dir.Copy(dirCopy, file, file, IOContext.DEFAULT);
                        files.Add(file);
                    }
                    dirCopy.Sync(files);
                    // Have IW kiss the dir so we remove any leftover
                    // files ... we can easily have leftover files at
                    // the time we take a copy because we are holding
                    // open a reader:
                    (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose();
                    dirCopy.RandomIOExceptionRate   = rate;
                    dir.RandomIOExceptionRateOnOpen = rate;
                }
            }

            if (r2 != null)
            {
                r2.Dispose();
            }
            if (r != null)
            {
                r.Dispose();
                dirCopy.Dispose();
            }
            dir.Dispose();
        }

Ejemplo n.º 58

0

Mostrar archivo

Archivo: TestDisjunctionMaxQuery.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestBooleanSpanQuery()
        {
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1);
            query.Add(sq2);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(hits, 1);
            directory.Dispose();
        }

Ejemplo n.º 59

0

Mostrar archivo

        public virtual void TestPositions()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document    d        = new Document();

            // f1,f2,f3: docs only
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_ONLY;

            Field f1 = NewField("f1", "this field has docs only", ft);

            d.Add(f1);

            Field f2 = NewField("f2", "this field has docs only", ft);

            d.Add(f2);

            Field f3 = NewField("f3", "this field has docs only", ft);

            d.Add(f3);

            FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);

            ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS;

            // f4,f5,f6 docs and freqs
            Field f4 = NewField("f4", "this field has docs and freqs", ft2);

            d.Add(f4);

            Field f5 = NewField("f5", "this field has docs and freqs", ft2);

            d.Add(f5);

            Field f6 = NewField("f6", "this field has docs and freqs", ft2);

            d.Add(f6);

            FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);

            ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;

            // f7,f8,f9 docs/freqs/positions
            Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3);

            d.Add(f7);

            Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3);

            d.Add(f8);

            Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3);

            d.Add(f9);

            writer.AddDocument(d);
            writer.ForceMerge(1);

            // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
            // and docs/freqs/positions for f3, f6, f9
            d = new Document();

            // f1,f4,f7: docs only
            f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            f4 = NewField("f4", "this field has docs only", ft);
            d.Add(f4);

            f7 = NewField("f7", "this field has docs only", ft);
            d.Add(f7);

            // f2, f5, f8: docs and freqs
            f2 = NewField("f2", "this field has docs and freqs", ft2);
            d.Add(f2);

            f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            f8 = NewField("f8", "this field has docs and freqs", ft2);
            d.Add(f8);

            // f3, f6, f9: docs and freqs and positions
            f3 = NewField("f3", "this field has docs and freqs and positions", ft3);
            d.Add(f3);

            f6 = NewField("f6", "this field has docs and freqs and positions", ft3);
            d.Add(f6);

            f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            // docs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions);
            // docs + docs/freqs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions);
            // docs + docs/freqs/pos = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions);
            // docs/freqs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions);
            // docs/freqs + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions);
            // docs/freqs + docs/freqs/pos = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions);
            // docs/freqs/pos + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions);
            // docs/freqs/pos + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions);
            // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions);

            reader.Dispose();
            ram.Dispose();
        }

Ejemplo n.º 60

0

Mostrar archivo

Archivo: TestNRTCachingDirectory.cs Proyecto: ChristopherHaws/lucenenet

        public virtual void TestNRTAndCommit()
        {
            Directory dir = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w = new RandomIndexWriter(Random(), cachedDir, conf);
            LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            int numDocs = TestUtil.NextInt(Random(), 100, 400);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList<BytesRef> ids = new List<BytesRef>();
            DirectoryReader r = null;
            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random().Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.w, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }

Ejemplos de MockAnalyzer en C# (CSharp)