public virtual void TestEnforceDeletions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10)));
            // asserts below requires no unexpected merges:

            // NOTE: cannot use writer.getReader because RIW (on
            // flipping a coin) may give us a newly opened reader,
            // but we use .reopen on this reader below and expect to
            // (must) get an NRT reader:
            DirectoryReader reader = DirectoryReader.Open(writer.IndexWriter, true);
            // same reason we don't wrap?
            IndexSearcher searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            // add a doc, refresh the reader, and check that it's there
            Document doc = new Document();

            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1);
            Assert.AreEqual(1, docs.TotalHits, "Should find a hit...");

            Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

            CachingWrapperFilter filter = new CachingWrapperFilter(startFilter);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.IsTrue(filter.GetSizeInBytes() > 0);

            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");

            Query constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // make sure we get a cache hit when we reopen reader
            // that had no change to deletions

            // fake delete (deletes nothing):
            writer.DeleteDocuments(new Term("foo", "bar"));

            IndexReader oldReader = reader;
            reader = RefreshReader(reader);
            Assert.IsTrue(reader == oldReader);
            int missCount = filter.missCount;
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // cache hit:
            Assert.AreEqual(missCount, filter.missCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;
            reader    = RefreshReader(reader);

            searcher = NewSearcher(reader, false);

            missCount = filter.missCount;
            docs      = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");

            // cache hit
            Assert.AreEqual(missCount, filter.missCount);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");

            // apply deletes dynamically:
            filter = new CachingWrapperFilter(startFilter);
            writer.AddDocument(doc);
            reader   = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");
            missCount = filter.missCount;
            Assert.IsTrue(missCount > 0);
            constantScore = new ConstantScoreQuery(filter);
            docs          = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.missCount);

            writer.AddDocument(doc);

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits...");
            Assert.IsTrue(filter.missCount > missCount);
            missCount = filter.missCount;

            constantScore = new ConstantScoreQuery(filter);
            docs          = searcher.Search(constantScore, 1);
            Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.missCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.missCount);

            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.missCount);

            // NOTE: silliness to make sure JRE does not eliminate
            // our holding onto oldReader to prevent
            // CachingWrapperFilter's WeakHashMap from dropping the
            // entry:
            Assert.IsTrue(oldReader != null);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestInfiniteValues()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document          doc    = new Document();

            doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO));
            doc.Add(new Int64Field("long", long.MinValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO));
            doc.Add(new Int64Field("long", long.MaxValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", 0.0, Field.Store.NO));
            doc.Add(new Int64Field("long", 0L, Field.Store.NO));
            writer.AddDocument(doc);

            foreach (double d in TestNumericUtils.DOUBLE_NANs)
            {
                doc = new Document();
                doc.Add(new DoubleField("double", d, Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Dispose();

            IndexReader   r = DirectoryReader.Open(dir);
            IndexSearcher s = NewSearcher(r);

            Query   q       = NumericRangeQuery.NewInt64Range("long", null, null, true, true);
            TopDocs topDocs = s.Search(q, 10);

            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt64Range("long", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewInt64Range("long", long.MinValue, long.MaxValue, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewDoubleRange("double", null, null, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewDoubleRange("double", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q       = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count");

            r.Dispose();
            dir.Dispose();
        }
示例#3
0
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt32(Random, 1, numDocs / 5);
            ISet <string> randomTerms = new HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random));
            }
            terms = new List <string>(randomTerms);
            long seed = Random.NextInt64();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random.nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomPicks.RandomFrom(Random, terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random.nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.IndexWriter.AddDocument(doc2);
            iw2.IndexWriter.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates)
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random.NextInt64();
                string term  = RandomPicks.RandomFrom(Random, terms);
                iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
示例#4
0
        public virtual void TestParsingAndSearching()
        {
            string field = "content";

            string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" };

            // queries that should find all docs
            Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) };

            // queries that should find no docs
            Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) };

            PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } };

            WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } };

            // prepare the index
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, docs[i], Field.Store.NO));
                iw.AddDocument(doc);
            }
            iw.Dispose();

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            // test queries that must find all
            foreach (Query q in matchAll)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(docs.Length, hits.Length);
            }

            // test queries that must find none
            foreach (Query q in matchNone)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits.Length);
            }

            // thest the prefi queries find only one doc
            for (int i = 0; i < matchOneDocPrefix.Length; i++)
            {
                for (int j = 0; j < matchOneDocPrefix[i].Length; j++)
                {
                    Query q = matchOneDocPrefix[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            // test the wildcard queries find only one doc
            for (int i = 0; i < matchOneDocWild.Length; i++)
            {
                for (int j = 0; j < matchOneDocWild[i].Length; j++)
                {
                    Query q = matchOneDocWild[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
示例#5
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher     = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random, new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory         copy = new MockDirectoryWrapper(Random, new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, Dir2);
                w.AddIndexes(copy);
                docCount = w.MaxDoc;
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw  = new RandomIndexWriter(Random, Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
            Document          doc_ = new Document();

            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader      = riw.GetReader();
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }
示例#6
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();
            // TODO: messy ... java7 has Files.createTempDirectory
            // ... but 4.x is java6:
            DirectoryInfo tempIndexPath = null;
            Random        random        = new Random();

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + random.Next(int.MaxValue)));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            using (Directory dir = FSDirectory.Open(tempIndexPath))
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (true)
                    {
                        BytesRef surfaceForm = iterator.Next();
                        if (surfaceForm == null)
                        {
                            break;
                        }
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }
                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetIterator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        int ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst == null)
                    {
                        throw new System.ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    try
                    {
                        if (success)
                        {
                            IOUtils.Close(writer, reader);
                        }
                        else
                        {
                            IOUtils.CloseWhileHandlingException(writer, reader);
                        }
                    }
                    finally
                    {
                        foreach (string file in dir.ListAll())
                        {
                            FileInfo path = new FileInfo(Path.Combine(tempIndexPath.FullName, file));
                            try
                            {
                                path.Delete();
                            }
                            catch (Exception e)
                            {
                                throw new InvalidOperationException("failed to remove " + path, e);
                            }
                        }

                        try
                        {
                            tempIndexPath.Delete();
                        }
                        catch (Exception e)
                        {
                            throw new InvalidOperationException("failed to remove " + tempIndexPath, e);
                        }
                    }
                }
            }
        }
示例#7
0
        /// <summary>
        /// Test using various international locales with accented characters (which
        /// sort differently depending on locale).
        /// </summary>

        // Copied (and slightly modified) from
        // Lucene.Net.Search.TestSort.TestInternationalSort()
        //
        // TODO: this test is really fragile. there are already 3 different cases,
        // depending upon unicode version.
        public virtual void TestCollationKeySort(Analyzer usAnalyzer,
                                                 Analyzer franceAnalyzer,
                                                 Analyzer swedenAnalyzer,
                                                 Analyzer denmarkAnalyzer,
                                                 string usResult,
                                                 string frResult,
                                                 string svResult,
                                                 string dkResult)
        {
            using (Directory indexStore = NewDirectory())
            {
                using (IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false))))
                {
                    // document data:
                    // the tracer field is used to determine which document was hit
                    string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } };

                    FieldType customType = new FieldType();
                    customType.IsStored = true;

                    for (int i = 0; i < sortData.Length; ++i)
                    {
                        Document doc = new Document();
                        doc.Add(new Field("tracer", sortData[i][0], customType));
                        doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO));
                        if (sortData[i][2] != null)
                        {
                            doc.Add(new TextField("US", usAnalyzer.GetTokenStream("US", new StringReader(sortData[i][2]))));
                        }
                        if (sortData[i][3] != null)
                        {
                            doc.Add(new TextField("France", franceAnalyzer.GetTokenStream("France", new StringReader(sortData[i][3]))));
                        }
                        if (sortData[i][4] != null)
                        {
                            doc.Add(new TextField("Sweden", swedenAnalyzer.GetTokenStream("Sweden", new StringReader(sortData[i][4]))));
                        }
                        if (sortData[i][5] != null)
                        {
                            doc.Add(new TextField("Denmark", denmarkAnalyzer.GetTokenStream("Denmark", new StringReader(sortData[i][5]))));
                        }
                        writer.AddDocument(doc);
                    }
                    writer.ForceMerge(1);
                } // writer.Dispose();
                using (IndexReader reader = DirectoryReader.Open(indexStore))
                {
                    IndexSearcher searcher = new IndexSearcher(reader);

                    Sort         sort   = new Sort();
                    Search.Query queryX = new TermQuery(new Term("contents", "x"));
                    Search.Query queryY = new TermQuery(new Term("contents", "y"));

                    sort.SetSort(new SortField("US", SortFieldType.STRING));
                    this.AssertMatches(searcher, queryY, sort, usResult);

                    sort.SetSort(new SortField("France", SortFieldType.STRING));
                    this.AssertMatches(searcher, queryX, sort, frResult);

                    sort.SetSort(new SortField("Sweden", SortFieldType.STRING));
                    this.AssertMatches(searcher, queryY, sort, svResult);

                    sort.SetSort(new SortField("Denmark", SortFieldType.STRING));
                    this.AssertMatches(searcher, queryY, sort, dkResult);
                } // reader.Dispose();
            }     // indexStore.Dispose();
        }
        private void matchedFieldsTestCase(bool useMatchedFields, bool fieldMatch, String fieldValue, String expected, params Query[] queryClauses)
        {
            Document  doc    = new Document();
            FieldType stored = new FieldType(TextField.TYPE_STORED);

            stored.StoreTermVectorOffsets   = (true);
            stored.StoreTermVectorPositions = (true);
            stored.StoreTermVectors         = (true);
            stored.Freeze();
            FieldType matched = new FieldType(TextField.TYPE_NOT_STORED);

            matched.StoreTermVectorOffsets   = (true);
            matched.StoreTermVectorPositions = (true);
            matched.StoreTermVectors         = (true);
            matched.Freeze();
            doc.Add(new Field("field", fieldValue, stored));               // Whitespace tokenized with English stop words
            doc.Add(new Field("field_exact", fieldValue, matched));        // Whitespace tokenized without stop words
            doc.Add(new Field("field_super_exact", fieldValue, matched));  // Whitespace tokenized without toLower
            doc.Add(new Field("field_characters", fieldValue, matched));   // Each letter is a token
            doc.Add(new Field("field_tripples", fieldValue, matched));     // Every three letters is a token
            doc.Add(new Field("field_sliced", fieldValue.Substring(0,      // Sliced at 10 chars then analyzed just like field
                                                                   Math.Min(fieldValue.Length - 1, 10) - 0), matched));
            doc.Add(new Field("field_der_red", new CannedTokenStream(      // Hacky field containing "der" and "red" at pos = 0
                                  token("der", 1, 0, 3),
                                  token("red", 0, 0, 3)
                                  ), matched));

            Analyzer analyzer = new AnalyzerWrapperAnonymousHelper();

            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            writer.AddDocument(doc);

            FastVectorHighlighter highlighter      = new FastVectorHighlighter();
            IFragListBuilder      fragListBuilder  = new SimpleFragListBuilder();
            IFragmentsBuilder     fragmentsBuilder = new ScoreOrderFragmentsBuilder();
            IndexReader           reader           = DirectoryReader.Open(writer, true);

            String[]     preTags  = new String[] { "<b>" };
            String[]     postTags = new String[] { "</b>" };
            IEncoder     encoder  = new DefaultEncoder();
            int          docId    = 0;
            BooleanQuery query    = new BooleanQuery();

            foreach (Query clause in queryClauses)
            {
                query.Add(clause, Occur.MUST);
            }
            FieldQuery fieldQuery = new FieldQuery(query, reader, true, fieldMatch);

            String[] bestFragments;
            if (useMatchedFields)
            {
                ISet <String> matchedFields = new JCG.HashSet <String>();
                matchedFields.Add("field");
                matchedFields.Add("field_exact");
                matchedFields.Add("field_super_exact");
                matchedFields.Add("field_characters");
                matchedFields.Add("field_tripples");
                matchedFields.Add("field_sliced");
                matchedFields.Add("field_der_red");
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", matchedFields, 25, 1,
                                                             fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
            }
            else
            {
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 25, 1,
                                                             fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
            }
            assertEquals(expected, bestFragments[0]);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        private void DoTest(FieldInfo.DocValuesType_e type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case FieldInfo.DocValuesType_e.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case FieldInfo.DocValuesType_e.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case FieldInfo.DocValuesType_e.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw new InvalidOperationException();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.LongValue = i;
                switch (type)
                {
                case FieldInfo.DocValuesType_e.SORTED:
                case FieldInfo.DocValuesType_e.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random(), 20);
                    } while (((string)vals[i]).Length == 0);
                    f.BytesValue = new BytesRef((string)vals[i]);
                    break;

                case FieldInfo.DocValuesType_e.NUMERIC:
                    int bitsPerValue = Random().NextIntBetween(1, 31);     // keep it an int
                    vals[i]     = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue));
                    f.LongValue = (long)vals[i];
                    break;
                }
                iw.AddDocument(document);
                if (Random().NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case FieldInfo.DocValuesType_e.BINARY:
                case FieldInfo.DocValuesType_e.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case FieldInfo.DocValuesType_e.NUMERIC:
                    vs = new LongFieldSource("dv");
                    break;

                default:
                    throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is LongFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.IntVal(i)];
                    switch (type)
                    {
                    case FieldInfo.DocValuesType_e.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd() >= 1);
                        goto case FieldInfo.DocValuesType_e.BINARY;

                    case FieldInfo.DocValuesType_e.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case FieldInfo.DocValuesType_e.NUMERIC:
                        assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
        public void TestPhraseHighlightTest()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            type.Freeze();
            Field longTermField   = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
            Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);

            doc.Add(longTermField);
            doc.Add(noLongTermField);
            writer.AddDocument(doc);
            FastVectorHighlighter highlighter = new FastVectorHighlighter();
            IndexReader           reader      = DirectoryReader.Open(writer, true);
            int    docId = 0;
            String field = "no_long_term";

            {
                BooleanQuery query = new BooleanQuery();
                query.Add(new TermQuery(new Term(field, "test")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
            }
            {
                BooleanQuery query = new BooleanQuery();
                PhraseQuery  pq    = new PhraseQuery();
                pq.Add(new Term(field, "test"));
                pq.Add(new Term(field, "foo"));
                pq.Add(new Term(field, "highlighed"));
                pq.Slop = (5);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(pq, Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(0, bestFragments.Length);
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
            }
            {
                PhraseQuery query = new PhraseQuery();
                query.Add(new Term(field, "test"));
                query.Add(new Term(field, "foo"));
                query.Add(new Term(field, "highlighed"));
                query.Slop = (3);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(0, bestFragments.Length);
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
            }
            {
                PhraseQuery query = new PhraseQuery();
                query.Add(new Term(field, "test"));
                query.Add(new Term(field, "foo"));
                query.Add(new Term(field, "highlighted"));
                query.Slop = (30);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                assertEquals(0, bestFragments.Length);
            }
            {
                BooleanQuery query = new BooleanQuery();
                PhraseQuery  pq    = new PhraseQuery();
                pq.Add(new Term(field, "test"));
                pq.Add(new Term(field, "foo"));
                pq.Add(new Term(field, "highlighed"));
                pq.Slop = (5);
                BooleanQuery inner = new BooleanQuery();
                inner.Add(pq, Occur.MUST);
                inner.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(inner, Occur.MUST);
                query.Add(pq, Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                assertEquals(0, bestFragments.Length);

                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
            }

            field = "long_term";
            {
                BooleanQuery query = new BooleanQuery();
                query.Add(new TermQuery(new Term(field,
                                                 "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>",
                             bestFragments[0]);
            }
            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public void TestBooleanPhraseWithSynonym()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_NOT_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            type.Freeze();
            Token syn = new Token("httpwwwfacebookcom", 6, 29);

            syn.PositionIncrement = (0);
            CannedTokenStream ts = new CannedTokenStream(
                new Token("test", 0, 4),
                new Token("http", 6, 10),
                syn,
                new Token("www", 13, 16),
                new Token("facebook", 17, 25),
                new Token("com", 26, 29)
                );
            Field field = new Field("field", ts, type);

            doc.Add(field);
            doc.Add(new StoredField("field", "Test: http://www.facebook.com"));
            writer.AddDocument(doc);
            FastVectorHighlighter highlighter = new FastVectorHighlighter();

            IndexReader reader = DirectoryReader.Open(writer, true);
            int         docId  = 0;

            // query1: match
            PhraseQuery pq = new PhraseQuery();

            pq.Add(new Term("field", "test"));
            pq.Add(new Term("field", "http"));
            pq.Add(new Term("field", "www"));
            pq.Add(new Term("field", "facebook"));
            pq.Add(new Term("field", "com"));
            FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader);

            String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            // query2: match
            PhraseQuery pq2 = new PhraseQuery();

            pq2.Add(new Term("field", "test"));
            pq2.Add(new Term("field", "httpwwwfacebookcom"));
            pq2.Add(new Term("field", "www"));
            pq2.Add(new Term("field", "facebook"));
            pq2.Add(new Term("field", "com"));
            fieldQuery    = highlighter.GetFieldQuery(pq2, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            // query3: OR query1 and query2 together
            BooleanQuery bq = new BooleanQuery();

            bq.Add(pq, Occur.SHOULD);
            bq.Add(pq2, Occur.SHOULD);
            fieldQuery    = highlighter.GetFieldQuery(bq, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
示例#12
0
        private void BuildDictionaries()
        {
            var propertyIdDomainsDictList = new Dictionary <int, HashSet <int> >();
            var propertyIdRangesDictList  = new Dictionary <int, HashSet <int> >();
            var logger = Logger.Logger.Init();

            logger.Info("Building Inverted Properties Domain and Range Dictionary");

            using (var luceneDirectory = FSDirectory.Open(_propertiesIndexPath)) {
                using var luceneDirectoryReader = DirectoryReader.Open(luceneDirectory);
                var docCount = luceneDirectoryReader.MaxDoc;
                for (var i = 0; i < docCount; i++)
                {
                    var doc      = luceneDirectoryReader.Document(i);
                    var property = doc.MapProperty();
                    propertyIdDomainsDictList.AddSafe(property.Id.ToInt(), property.Domain);
                    propertyIdRangesDictList.AddSafe(property.Id.ToInt(), property.Range);
                }
            }

            _propertyIdDomainTypesDictionary  = propertyIdDomainsDictList.ToArrayDictionary();
            _propertyIdRangeTypesDictionary   = propertyIdRangesDictList.ToArrayDictionary();
            _typeIdDomainPropertiesDictionary = _propertyIdDomainTypesDictionary.InvertDictionary();
            _typeIdRangePropertiesDictionary  = _propertyIdRangeTypesDictionary.InvertDictionary();

            var propertyDomainOutgoingPropertiesIds = new Dictionary <int, HashSet <int> >();
            var propertyDomainIncomingPropertiesIds = new Dictionary <int, HashSet <int> >();
            var propertyRangeOutgoingPropertiesIds  = new Dictionary <int, HashSet <int> >();
            var propertyRangeIncomingPropertiesIds  = new Dictionary <int, HashSet <int> >();

            foreach (var propertyId in _propertyIdDomainTypesDictionary.Select(x => x.Key))
            {
                propertyDomainOutgoingPropertiesIds[propertyId] = new HashSet <int>();
                propertyDomainIncomingPropertiesIds[propertyId] = new HashSet <int>();

                if (!_propertyIdDomainTypesDictionary.ContainsKey(propertyId))
                {
                    continue;
                }
                var domainIds = _propertyIdDomainTypesDictionary[propertyId];

                foreach (var domainId in domainIds)
                {
                    if (_typeIdDomainPropertiesDictionary.ContainsKey(domainId))
                    {
                        var domainProperties = _typeIdDomainPropertiesDictionary[domainId];
                        propertyDomainOutgoingPropertiesIds[propertyId].AddAll(domainProperties);
                    }

                    if (_typeIdRangePropertiesDictionary.ContainsKey(domainId))
                    {
                        var rangeProperties = _typeIdRangePropertiesDictionary[domainId];
                        propertyDomainIncomingPropertiesIds[propertyId].AddAll(rangeProperties);
                    }
                }
            }

            foreach (var propertyId in _propertyIdRangeTypesDictionary.Select(x => x.Key))
            {
                propertyRangeOutgoingPropertiesIds[propertyId] = new HashSet <int>();
                propertyRangeIncomingPropertiesIds[propertyId] = new HashSet <int>();

                if (!_propertyIdRangeTypesDictionary.ContainsKey(propertyId))
                {
                    continue;
                }

                var rangeIds = _propertyIdRangeTypesDictionary[propertyId];
                foreach (var rangeId in rangeIds)
                {
                    if (_typeIdDomainPropertiesDictionary.ContainsKey(rangeId))
                    {
                        var domainProperties = _typeIdDomainPropertiesDictionary[rangeId];
                        propertyRangeOutgoingPropertiesIds[propertyId].AddAll(domainProperties);
                    }

                    if (_typeIdRangePropertiesDictionary.ContainsKey(rangeId))
                    {
                        var rangeProperties = _typeIdRangePropertiesDictionary[rangeId];
                        propertyRangeIncomingPropertiesIds[propertyId].AddAll(rangeProperties);
                    }
                }
            }

            _propertyDomainOutgoingPropertiesIds = propertyDomainOutgoingPropertiesIds.ToArrayDictionary();
            _propertyDomainIncomingPropertiesIds = propertyDomainIncomingPropertiesIds.ToArrayDictionary();
            _propertyRangeOutgoingPropertiesIds  = propertyRangeOutgoingPropertiesIds.ToArrayDictionary();
            _propertyRangeIncomingPropertiesIds  = propertyRangeIncomingPropertiesIds.ToArrayDictionary();

            logger.Info("InMemory Domain Range Query Engine Complete");
        }
示例#13
0
        public void TestClose()
        {
            using (IndexReader r = DirectoryReader.Open(userindex))
            {
                spellChecker.ClearIndex();
                string field = "field1";
                Addwords(r, spellChecker, "field1");
                int num_field1 = this.NumDoc();
                Addwords(r, spellChecker, "field2");
                int num_field2 = this.NumDoc();
                assertEquals(num_field2, num_field1 + 1);
                CheckCommonSuggestions(r);
                AssertLastSearcherOpen(4);
                spellChecker.Dispose();
                AssertSearchersClosed();
                // LUCENENET NOTE: Per MSDN, calling Dispose() multiple times
                // should be a safe operation. http://stackoverflow.com/a/5306896/181087
                // Certainly, there shouldn't be a problem with calling Dispose() within
                // a using block if you decide to free up resources early.
                //try
                //{
                //    spellChecker.Dispose();
                //    fail("spellchecker was already closed");
                //}
                //catch (ObjectDisposedException e)
                //{
                //    // expected
                //}
                try
                {
                    CheckCommonSuggestions(r);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.ClearIndex();
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.IndexDictionary(new LuceneDictionary(r, field), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }

                try
                {
                    spellChecker.SetSpellIndex(spellindex);
                    fail("spellchecker was already closed");
                }
                catch (ObjectDisposedException /*e*/)
                {
                    // expected
                }
                assertEquals(4, searchers.Count);
                AssertSearchersClosed();
            }
        }
示例#14
0
        public void TestSuggestModes()
        {
            using (IndexReader r = DirectoryReader.Open(userindex))
            {
                spellChecker.ClearIndex();
                Addwords(r, spellChecker, "field1");


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 2, r, "field1",
                                                                   SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertEquals(1, similar.Length);
                    assertEquals("eighty", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 2, r, "field1",
                                                                   SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertEquals(1, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(5, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("twenty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(1, similar.Length);
                    assertEquals("twenty-one", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_MORE_POPULAR);
                    assertEquals(0, similar.Length);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_ALWAYS);
                    assertEquals(5, similar.Length);
                    assertEquals("eight", similar[0]);
                }


                {
                    string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
                                                                   SuggestMode.SUGGEST_ALWAYS);
                    assertEquals(5, similar.Length);
                    assertEquals("eighty", similar[0]);
                }
            }
        }
        /// <summary>
        /// 执行搜索
        /// </summary>
        /// <param name="options">搜索选项</param>
        /// <param name="safeSearch">启用安全搜索</param>
        /// <returns></returns>
        private ILuceneSearchResultCollection PerformSearch(SearchOptions options, bool safeSearch)
        {
            // 结果集
            ILuceneSearchResultCollection results = new LuceneSearchResultCollection();

            using var reader = DirectoryReader.Open(_directory);
            var   searcher = new IndexSearcher(reader);
            Query query;

            // 启用安全搜索
            if (safeSearch)
            {
                options.Keywords = QueryParserBase.Escape(options.Keywords);
            }

            if (options.Fields.Count == 1)
            {
                // 单字段搜索
                var queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer);
                query = queryParser.Parse(options.Keywords);
            }
            else
            {
                // 多字段搜索
                var multiFieldQueryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts);
                query = GetFuzzyquery(multiFieldQueryParser, options.Keywords);
            }

            var sortFields = new List <SortField>
            {
                SortField.FIELD_SCORE
            };

            sortFields.AddRange(options.OrderBy.Select(sortField => new SortField(sortField, SortFieldType.STRING)));

            // 排序规则处理

            var sort = new Sort(sortFields.ToArray());

            Expression <Func <ScoreDoc, bool> > where = _ => true;
            if (options.Type != null)
            {
                // 过滤掉已经设置了类型的对象
                @where = @where.And(m => options.Type.AssemblyQualifiedName == searcher.Doc(m.Doc).Get("Type"));
            }
            var matches = searcher.Search(query, null, options.MaximumNumberOfHits, sort, true, true).ScoreDocs.Where(@where.Compile());

            results.TotalHits = matches.Count();

            // 分页处理
            if (options.Skip.HasValue)
            {
                matches = matches.Skip(options.Skip.Value);
            }
            if (options.Take.HasValue)
            {
                matches = matches.Take(options.Take.Value);
            }

            var docs = matches.ToList();

            // 创建结果集
            foreach (var match in docs)
            {
                var doc = searcher.Doc(match.Doc);
                results.Results.Add(new LuceneSearchResult()
                {
                    Score    = match.Score,
                    Document = doc
                });
            }

            return(results);
        }
示例#16
0
        public virtual void TestSetBufferSize()
        {
            var indexDir = CreateTempDir("testSetBufferSize");
            var dir      = new MockFSDirectory(indexDir, Random);

            try
            {
                var writer = new IndexWriter(
                    dir,
                    new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                    .SetOpenMode(OpenMode.CREATE)
                    .SetMergePolicy(NewLogMergePolicy(false)));

                for (int i = 0; i < 37; i++)
                {
                    var doc = new Document();
                    doc.Add(NewTextField("content", "aaa bbb ccc ddd" + i, Field.Store.YES));
                    doc.Add(NewTextField("id", "" + i, Field.Store.YES));
                    writer.AddDocument(doc);
                }

                dir.AllIndexInputs.Clear();

                IndexReader reader = DirectoryReader.Open(writer, true);
                var         aaa    = new Term("content", "aaa");
                var         bbb    = new Term("content", "bbb");
                reader.Dispose();

                dir.TweakBufferSizes();
                writer.DeleteDocuments(new Term("id", "0"));
                reader = DirectoryReader.Open(writer, true);
                var searcher = NewSearcher(reader);
                var hits     = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs;
                dir.TweakBufferSizes();
                Assert.AreEqual(36, hits.Length);

                reader.Dispose();

                dir.TweakBufferSizes();
                writer.DeleteDocuments(new Term("id", "4"));
                reader   = DirectoryReader.Open(writer, true);
                searcher = NewSearcher(reader);

                hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs;
                dir.TweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.TweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).ScoreDocs;
                dir.TweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).ScoreDocs;
                dir.TweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                writer.Dispose();
                reader.Dispose();
            }
            finally
            {
                indexDir.Delete(true);
            }
        }
示例#17
0
        public static void Main(string[] args)
        {
            if (args.Length < 4 || args.Length > 5)
            {
                // LUCENENET specific - our wrapper console shows correct usage
                throw new ArgumentException();
                //Console.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
                //Console.Error.WriteLine("topicsFile: input file containing queries");
                //Console.Error.WriteLine("qrelsFile: input file containing relevance judgements");
                //Console.Error.WriteLine("submissionFile: output submission file for trec_eval");
                //Console.Error.WriteLine("indexDir: index directory");
                //Console.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
                //Console.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)");
                //Environment.Exit(1);
            }

            FileInfo         topicsFile = new FileInfo(args[0]);
            FileInfo         qrelsFile  = new FileInfo(args[1]);
            SubmissionReport submitLog  = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene");

            using (Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])))
                using (IndexReader reader = DirectoryReader.Open(dir))
                {
                    string        fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified.
                    IndexSearcher searcher  = new IndexSearcher(reader);

                    int    maxResults   = 1000;
                    string docNameField = "docname";

                    TextWriter logger = Console.Out; //new StreamWriter(Console, Encoding.GetEncoding(0));

                    // use trec utilities to read trec topics into quality queries
                    TrecTopicsReader qReader = new TrecTopicsReader();
                    QualityQuery[]   qqs     = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8));

                    // prepare judge, with trec utilities that read from a QRels file
                    IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8));

                    // validate topics & judgments match each other
                    judge.ValidateData(qqs, logger);

                    ISet <string> fieldSet = new JCG.HashSet <string>();
                    if (fieldSpec.IndexOf('T') >= 0)
                    {
                        fieldSet.Add("title");
                    }
                    if (fieldSpec.IndexOf('D') >= 0)
                    {
                        fieldSet.Add("description");
                    }
                    if (fieldSpec.IndexOf('N') >= 0)
                    {
                        fieldSet.Add("narrative");
                    }

                    // set the parsing of quality queries into Lucene queries.
                    IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body");

                    // run the benchmark
                    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
                    qrun.MaxResults = maxResults;
                    QualityStats[] stats = qrun.Execute(judge, submitLog, logger);

                    // print an avarage sum of the results
                    QualityStats avg = QualityStats.Average(stats);
                    avg.Log("SUMMARY", 2, logger, "  ");
                }
        }
示例#18
0
        public List <ResultData> GetResults(Query query, List <RequiredHighlight> requiredHighlights)
        {
            try
            {
                Directory            indexDir  = new SimpleFSDirectory(ConfigurationManager.IndexDir);
                DirectoryReader      reader    = DirectoryReader.Open(indexDir);
                IndexSearcher        isr       = new IndexSearcher(reader);
                TopScoreDocCollector collector = TopScoreDocCollector.Create(
                    10, true);

                List <ResultData> results = new List <ResultData>();

                isr.Search(query, collector);
                ScoreDoc[] hits = collector.GetTopDocs().ScoreDocs;

                ResultData  re;
                Document    doc;
                Highlighter hi;

                foreach (ScoreDoc sd in hits)
                {
                    doc = isr.Doc(sd.Doc);
                    string[] allKeywords = doc.GetValues("keyword");
                    string   keywords    = "";
                    foreach (string keyword in allKeywords)
                    {
                        keywords += keyword.Trim() + " ";
                    }
                    keywords = keywords.Trim();
                    string title     = doc.Get("title");
                    string location  = doc.Get("filename");
                    string author    = doc.Get("author");
                    int    category  = Int32.Parse(doc.Get("category"));
                    string language  = doc.Get("language");
                    string highlight = "";
                    string text      = GetDocumentText(location);
                    foreach (var item in requiredHighlights)
                    {
                        hi = new Highlighter(new QueryScorer(query, reader, item.FieldName));
                        try
                        {
                            highlight += hi.GetBestFragment(analyzer, item.FieldName, text);
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message + "  on" + e.StackTrace);
                        }
                    }


                    re = new ResultData()
                    {
                        Title      = title,
                        Filename   = location,
                        Keywords   = keywords,
                        CategoryId = category,
                        Author     = author,
                        Highlight  = highlight,
                    };
                    results.Add(re);
                }
                reader.Dispose();
                return(results);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Source);
                return(new List <ResultData>());
            }
        }
        public void TestMVGroupedFacetingWithDeletes()
        {
            string    groupField = "hotel";
            FieldType customType = new FieldType();

            customType.IsStored = (true);

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            bool useDv = false;

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = (false);

            // 0
            Document doc = new Document();

            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            w.Commit();
            w.DeleteDocuments(new TermQuery(new Term("airport", "ams")));

            // 2
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "dus", Field.Store.NO));

            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 6
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 7
            doc = new Document();
            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            w.Dispose();
            IndexSearcher indexSearcher = NewSearcher(DirectoryReader.Open(dir));
            AbstractGroupFacetCollector groupedAirportFacetCollector = CreateRandomCollector(groupField, "airport", null, true);

            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false);
            assertEquals(3, airportResult.TotalCount);
            assertEquals(1, airportResult.TotalMissingCount);

            IList <TermGroupFacetCollector.FacetEntry> entries = airportResult.GetFacetEntries(0, 10);

            assertEquals(2, entries.size());
            assertEquals("ams", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);
            assertEquals("dus", entries[1].Value.Utf8ToString());
            assertEquals(1, entries[1].Count);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
示例#20
0
        public virtual void TestPerFieldCodec()
        {
            int NUM_DOCS = AtLeast(173);

            if (Verbose)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            using BaseDirectoryWrapper dir = NewDirectory();
            dir.CheckIndexOnDispose        = false; // we use a custom codec provider
            using IndexWriter w            = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3)));
            Documents.Document doc = new Documents.Document();
            // uses default codec:
            doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
            // uses pulsing codec:
            Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO);

            doc.Add(field2);

            Field idField = NewStringField("id", "", Field.Store.NO);

            doc.Add(idField);
            for (int i = 0; i < NUM_DOCS; i++)
            {
                idField.SetStringValue("" + i);
                w.AddDocument(doc);
                if ((i + 1) % 10 == 0)
                {
                    w.Commit();
                }
            }
            if (Verbose)
            {
                Console.WriteLine("TEST: now delete id=77");
            }
            w.DeleteDocuments(new Term("id", "77"));

            using (IndexReader r = DirectoryReader.Open(w, true))
            {
                Assert.AreEqual(NUM_DOCS - 1, r.NumDocs);
                IndexSearcher s = NewSearcher(r);
                Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits);
                Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits);
            }

            if (Verbose)
            {
                Console.WriteLine("\nTEST: now delete 2nd doc");
            }
            w.DeleteDocuments(new Term("id", "44"));

            if (Verbose)
            {
                Console.WriteLine("\nTEST: now force merge");
            }
            w.ForceMerge(1);
            if (Verbose)
            {
                Console.WriteLine("\nTEST: now open reader");
            }
            using (IndexReader r = DirectoryReader.Open(w, true))
            {
                Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc);
                Assert.AreEqual(NUM_DOCS - 2, r.NumDocs);
                IndexSearcher s = NewSearcher(r);
                Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits);
                Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits);
                Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits);
                Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits);
                Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits);

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: now close NRT reader");
                }
            }
        }
示例#21
0
 private Lucene.Net.Index.IndexReader CreateReader()
 {
     return(DirectoryReader.Open(((LuceneIndex)_searchIndex).Directory, true));
 }
示例#22
0
        public void TestBreakingWords()
        {
            IndexReader ir = null;

            try
            {
                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();

                {
                    Term term = new Term("numbers", "ninetynine");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.equals("ninety"));
                    assertTrue(sw[0][1].String.equals("nine"));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);
                }
                {
                    Term term = new Term("numbers", "onethousand");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.equals("one"));
                    assertTrue(sw[0][1].String.equals("thousand"));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (1);
                    sw = wbsp.SuggestWordBreaks(term, 1, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (2);
                    sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (1);
                    sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 2);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.equals("one"));
                    assertTrue(sw[0][1].String.equals("thousand"));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);
                    assertTrue(sw[0][1].Freq > 1);
                    assertTrue(sw[0][0].Freq > sw[0][1].Freq);
                    assertTrue(sw[1].Length == 3);
                    assertTrue(sw[1][0].String.equals("one"));
                    assertTrue(sw[1][1].String.equals("thou"));
                    assertTrue(sw[1][2].String.equals("sand"));
                    assertTrue(sw[1][0].Score == 2);
                    assertTrue(sw[1][1].Score == 2);
                    assertTrue(sw[1][2].Score == 2);
                    assertTrue(sw[1][0].Freq > 1);
                    assertTrue(sw[1][1].Freq == 1);
                    assertTrue(sw[1][2].Freq == 1);
                }
                {
                    Term term = new Term("numbers", "onethousandonehundredeleven");
                    wbsp.MaxChanges             = (3);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 0);

                    wbsp.MaxChanges = (4);
                    sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 5);

                    wbsp.MaxChanges = (5);
                    sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 2);
                    assertTrue(sw[0].Length == 5);
                    assertTrue(sw[0][1].String.equals("thousand"));
                    assertTrue(sw[1].Length == 6);
                    assertTrue(sw[1][1].String.equals("thou"));
                    assertTrue(sw[1][2].String.equals("sand"));
                }
                {
                    //make sure we can handle 2-char codepoints
                    Term term = new Term("numbers", "\uD864\uDC79");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 0);
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                try { ir.Dispose(); } catch (Exception /*e1*/) { }
            }
        }
示例#23
0
        public static List <int> Search(string key, int page, int pageSize, out int totalHits)
        {
            totalHits = 0;
            if (string.IsNullOrEmpty(key))
            {
                return(null);
            }
            key = key.Trim().ToLower();

            var rs = new List <int>();

            try
            {
                var indexDir = Path.Combine(System.IO.Directory.GetCurrentDirectory(), "lucene");

                //var VERSION = Lucene.Net.Util.LuceneVersion.LUCENE_48;

                if (System.IO.Directory.Exists(indexDir) == true)
                {
                    var reader = DirectoryReader.Open(FSDirectory.Open(new DirectoryInfo(indexDir)));
                    var search = new IndexSearcher(reader);

                    var directory = FSDirectory.Open(new DirectoryInfo(indexDir), NoLockFactory.GetNoLockFactory());
                    var reader2   = DirectoryReader.Open(directory);
                    var search2   = new IndexSearcher(reader2);

                    //var parser = new QueryParser(VERSION, "title", new JieBaAnalyzer(TokenizerMode.Search));
                    var booleanQuery = new BooleanQuery();

                    var list = CutKeyWord(key);
                    foreach (var word in list)
                    {
                        var query1 = new TermQuery(new Term("title", word));
                        var query2 = new TermQuery(new Term("summary", word));
                        booleanQuery.Add(query1, Occur.SHOULD);
                        booleanQuery.Add(query2, Occur.SHOULD);
                    }

                    var collector = TopScoreDocCollector.Create(1000, true);
                    search2.Search(booleanQuery, null, collector);
                    var docs = collector.GetTopDocs((page - 1) * pageSize, (page) * pageSize).ScoreDocs;
                    totalHits = collector.TotalHits;
                    foreach (var d in docs)
                    {
                        var num      = d.Doc;
                        var document = search.Doc(num);// 拿到指定的文档

                        var articleId = document.Get("articleId");
                        //var name = document.Get("title");

                        if (int.TryParse(articleId, out int mid) == true)
                        {
                            rs.Add(mid);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine($"SearchMerchs ex={ex}");
            }

            return(rs);
        }
示例#24
0
        public void GRandom()
        {
            int numDocs = TestUtil.NextInt(Random(), (10 * RANDOM_MULTIPLIER),
                                           (100 * RANDOM_MULTIPLIER));
            Directory         dir    = null;
            RandomIndexWriter writer = null;
            IndexReader       ir     = null;

            try
            {
                dir    = NewDirectory();
                writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(),
                                                                               MockTokenizer.WHITESPACE, false), Similarity, TimeZone);
                int             maxLength = TestUtil.NextInt(Random(), 5, 50);
                List <string>   originals = new List <string>(numDocs);
                List <string[]> breaks    = new List <string[]>(numDocs);
                for (int i = 0; i < numDocs; i++)
                {
                    string orig = "";
                    if (Random().nextBoolean())
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomSimpleString(Random(), maxLength);
                        }
                    }
                    else
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomUnicodeString(Random(), maxLength);
                        }
                    }
                    originals.Add(orig);
                    int totalLength = orig.CodePointCount(0, orig.Length);
                    int breakAt     = orig.OffsetByCodePoints(0,
                                                              TestUtil.NextInt(Random(), 1, totalLength - 1));
                    string[] broken = new string[2];
                    broken[0] = orig.Substring(0, breakAt - 0);
                    broken[1] = orig.Substring(breakAt);
                    breaks.Add(broken);
                    Document doc = new Document();
                    doc.Add(NewTextField("random_break", broken[0] + " " + broken[1],
                                         Field.Store.NO));
                    doc.Add(NewTextField("random_combine", orig, Field.Store.NO));
                    writer.AddDocument(doc);
                }
                writer.Commit();
                writer.Dispose();

                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
                wbsp.MaxChanges             = (1);
                wbsp.MinBreakWordLength     = (1);
                wbsp.MinSuggestionFrequency = (1);
                wbsp.MaxCombineWordLength   = (maxLength);
                for (int i = 0; i < originals.size(); i++)
                {
                    string orig  = originals[i];
                    string left  = breaks[i][0];
                    string right = breaks[i][1];
                    {
                        Term term = new Term("random_break", orig);

                        SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(),
                                                                    ir, SuggestMode.SUGGEST_ALWAYS,
                                                                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                        bool failed = true;
                        foreach (SuggestWord[] sw1 in sw)
                        {
                            assertTrue(sw1.Length == 2);
                            if (sw1[0].String.equals(left) && sw1[1].String.equals(right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting break suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                    {
                        Term[] terms = { new Term("random_combine", left),
                                         new Term("random_combine", right) };
                        CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms,
                                                                              originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
                        bool failed = true;
                        foreach (CombineSuggestion cs1 in cs)
                        {
                            assertTrue(cs1.OriginalTermIndexes.Length == 2);
                            if (cs1.Suggestion.String.equals(left + right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting combine suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                try
                {
                    ir.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    writer.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    dir.Dispose();
                }
                catch (Exception /*e1*/) { }
            }
        }
示例#25
0
        public void TestReadAndWrite()
        {
            var connectionString = _connectionString ?? "UseDevelopmentStorage=true";

            var cloudStorageAccount = CloudStorageAccount.Parse(connectionString);

            const string containerName = "testcatalog";
            var          blobClient    = cloudStorageAccount.CreateCloudBlobClient();
            var          container     = blobClient.GetContainerReference(containerName);

            container.DeleteIfExists();

            var azureDirectory = new AzureDirectory(cloudStorageAccount, containerName);

            var indexWriterConfig = new IndexWriterConfig(
                Lucene.Net.Util.LuceneVersion.LUCENE_48,
                new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48));

            int dog = 0, cat = 0, car = 0;

            using (var indexWriter = new IndexWriter(azureDirectory, indexWriterConfig))
            {
                for (var iDoc = 0; iDoc < 1000; iDoc++)
                {
                    var bodyText = GeneratePhrase(40);
                    var doc      = new Document {
                        new TextField("id", DateTime.Now.ToFileTimeUtc() + "-" + iDoc, Field.Store.YES),
                        new TextField("Title", GeneratePhrase(10), Field.Store.YES),
                        new TextField("Body", bodyText, Field.Store.YES)
                    };
                    dog += bodyText.Contains(" dog ") ? 1 : 0;
                    cat += bodyText.Contains(" cat ") ? 1 : 0;
                    car += bodyText.Contains(" car ") ? 1 : 0;
                    indexWriter.AddDocument(doc);
                }

                Console.WriteLine("Total docs is {0}, {1} dog, {2} cat, {3} car", indexWriter.NumDocs, dog, cat, car);
            }
            try
            {
                var ireader = DirectoryReader.Open(azureDirectory);
                for (var i = 0; i < 100; i++)
                {
                    var searcher        = new IndexSearcher(ireader);
                    var searchForPhrase = SearchForPhrase(searcher, "dog");
                    Assert.AreEqual(dog, searchForPhrase);
                    searchForPhrase = SearchForPhrase(searcher, "cat");
                    Assert.AreEqual(cat, searchForPhrase);
                    searchForPhrase = SearchForPhrase(searcher, "car");
                    Assert.AreEqual(car, searchForPhrase);
                }
                Console.WriteLine("Tests passsed");
            }
            catch (Exception x)
            {
                Console.WriteLine("Tests failed:\n{0}", x);
            }
            finally
            {
                // check the container exists, and delete it
                Assert.IsTrue(container.Exists()); // check the container exists
                container.Delete();
            }
        }
示例#26
0
        public void TestCombiningWords()
        {
            IndexReader ir = null;

            try
            {
                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();

                {
                    Term[] terms =
                    {
                        new Term("numbers", "one"),
                        new Term("numbers", "hun"),
                        new Term("numbers", "dred"),
                        new Term("numbers", "eight"),
                        new Term("numbers", "y"),
                        new Term("numbers", "eight"),
                    };
                    wbsp.MaxChanges             = (3);
                    wbsp.MaxCombineWordLength   = (20);
                    wbsp.MinSuggestionFrequency = (1);
                    CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS);
                    assertTrue(cs.Length == 5);

                    assertTrue(cs[0].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[0].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[0].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[0].Suggestion.String.equals("hundred"));
                    assertTrue(cs[0].Suggestion.Score == 1);

                    assertTrue(cs[1].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[1].OriginalTermIndexes[0] == 3);
                    assertTrue(cs[1].OriginalTermIndexes[1] == 4);
                    assertTrue(cs[1].Suggestion.String.equals("eighty"));
                    assertTrue(cs[1].Suggestion.Score == 1);

                    assertTrue(cs[2].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[2].OriginalTermIndexes[0] == 4);
                    assertTrue(cs[2].OriginalTermIndexes[1] == 5);
                    assertTrue(cs[2].Suggestion.String.equals("yeight"));
                    assertTrue(cs[2].Suggestion.Score == 1);

                    for (int i = 3; i < 5; i++)
                    {
                        assertTrue(cs[i].OriginalTermIndexes.Length == 3);
                        assertTrue(cs[i].Suggestion.Score == 2);
                        assertTrue(
                            (cs[i].OriginalTermIndexes[0] == 1 &&
                             cs[i].OriginalTermIndexes[1] == 2 &&
                             cs[i].OriginalTermIndexes[2] == 3 &&
                             cs[i].Suggestion.String.equals("hundredeight")) ||
                            (cs[i].OriginalTermIndexes[0] == 3 &&
                             cs[i].OriginalTermIndexes[1] == 4 &&
                             cs[i].OriginalTermIndexes[2] == 5 &&
                             cs[i].Suggestion.String.equals("eightyeight"))
                            );
                    }

                    cs = wbsp.SuggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertTrue(cs.Length == 2);
                    assertTrue(cs[0].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[0].Suggestion.Score == 1);
                    assertTrue(cs[0].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[0].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[0].Suggestion.String.equals("hundred"));
                    assertTrue(cs[0].Suggestion.Score == 1);

                    assertTrue(cs[1].OriginalTermIndexes.Length == 3);
                    assertTrue(cs[1].Suggestion.Score == 2);
                    assertTrue(cs[1].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[1].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[1].OriginalTermIndexes[2] == 3);
                    assertTrue(cs[1].Suggestion.String.equals("hundredeight"));
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                try { ir.Dispose(); } catch (Exception /*e1*/) { }
            }
        }
示例#27
0
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random;
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary      dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputEnumerator inputIterator = dictionary.GetEntryEnumerator();

            while (inputIterator.MoveNext())
            {
                string   field = inputIterator.Current.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
        public override void BeforeClass()
        {
            base.BeforeClass();

            Dir   = NewDirectory();
            Sdir1 = NewDirectory();
            Sdir2 = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            Reader   = DirectoryReader.Open(Dir);
            Searcher = NewSearcher(Reader);

            MultiReader   = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true);
            MultiSearcher = NewSearcher(MultiReader);

            MultiReaderDupls   = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true);
            MultiSearcherDupls = NewSearcher(MultiReaderDupls);
        }
示例#29
0
        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random(), cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            int numDocs            = TestUtil.NextInt(Random(), 100, 400);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random().Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.w, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }
示例#30
0
 private IndexReader CreateIndexReader()
 {
     lock (_syncLock) {
         return(_indexReader ?? (_indexReader = DirectoryReader.Open(_directory)));
     }
 }