Exemplo n.º 1
0
        public void TestBoostOnlyRewrite()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            addDoc("Lucene", writer);
            addDoc("Lucene", writer);
            addDoc("Lucenne", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));

            query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            // normally, 'Lucenne' would be the first result as IDF will skew the score.
            assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field"));
            assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field"));
            assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field"));
            reader.Dispose();
            directory.Dispose();
        }
Exemplo n.º 2
0
        public void assertFromTestData(int[] codePointTable)
        {
            if (VERBOSE)
            {
                Console.WriteLine("TEST: codePointTable=" + codePointTable);
            }
            //Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt");
            Stream     stream = GetType().Assembly.GetManifestResourceStream("Lucene.Net.Sandbox.Queries.fuzzyTestData.txt");
            TextReader reader = new StreamReader(stream, Encoding.UTF8);

            int bits  = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
            int terms = (int)Math.Pow(2, bits);

            Store.Directory   dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));

            Document doc   = new Document();
            Field    field = NewTextField("field", "", Field.Store.NO);

            doc.Add(field);

            for (int i = 0; i < terms; i++)
            {
                field.StringValue = (MapInt(codePointTable, i));
                writer.AddDocument(doc);
            }

            IndexReader   r        = writer.Reader;
            IndexSearcher searcher = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + searcher);
            }
            // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
            // otherwise scores are different!
            searcher.Similarity = (new DefaultSimilarity());

            writer.Dispose();
            String line;
            int    lineNum = 0;

            while ((line = reader.ReadLine()) != null)
            {
                lineNum++;
                String[] @params  = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                String   query    = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
                int      prefix   = int.Parse(@params[1], CultureInfo.InvariantCulture);
                int      pqSize   = int.Parse(@params[2], CultureInfo.InvariantCulture);
                float    minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
#pragma warning disable 612, 618
                SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
#pragma warning restore 612, 618
                q.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
                int     expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
                TopDocs docs            = searcher.Search(q, expectedResults);
                assertEquals(expectedResults, docs.TotalHits);
                for (int i = 0; i < expectedResults; i++)
                {
                    String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                    assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
                    assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Exemplo n.º 3
0
        public void assertFromTestData(int[] codePointTable)
        {
            if (VERBOSE)
            {
                Console.WriteLine("TEST: codePointTable=" + codePointTable);
            }
            //Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt");
            Stream stream = GetType().Assembly.GetManifestResourceStream("Lucene.Net.Sandbox.Queries.fuzzyTestData.txt");
            TextReader reader = new StreamReader(stream, Encoding.UTF8);

            int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
            int terms = (int)Math.Pow(2, bits);

            Store.Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));

            Document doc = new Document();
            Field field = NewTextField("field", "", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < terms; i++)
            {
                field.StringValue = (MapInt(codePointTable, i));
                writer.AddDocument(doc);
            }

            IndexReader r = writer.Reader;
            IndexSearcher searcher = NewSearcher(r);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + searcher);
            }
            // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
            // otherwise scores are different!
            searcher.Similarity = (new DefaultSimilarity());

            writer.Dispose();
            String line;
            int lineNum = 0;
            while ((line = reader.ReadLine()) != null)
            {
                lineNum++;
                String[] @params = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
                int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture);
                int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture);
                float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
#pragma warning disable 612, 618
                SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
#pragma warning restore 612, 618
                q.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
                int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
                TopDocs docs = searcher.Search(q, expectedResults);
                assertEquals(expectedResults, docs.TotalHits);
                for (int i = 0; i < expectedResults; i++)
                {
                    String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                    assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
                    assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Exemplo n.º 4
0
        public void TestBoostOnlyRewrite()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            addDoc("Lucene", writer);
            addDoc("Lucene", writer);
            addDoc("Lucenne", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            writer.Dispose();

            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
            query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            // normally, 'Lucenne' would be the first result as IDF will skew the score.
            assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field"));
            assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field"));
            assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field"));
            reader.Dispose();
            directory.Dispose();
        }