示例#1
0
        public void TestFlattenFilteredQuery()
        {
            initBoost();
            Query query = new FilteredQuery(pqF("A"), new TestFlattenFilteredQueryFilterAnonymousHelper());

            query.Boost = (boost);
            FieldQuery   fq          = new FieldQuery(query, true, true);
            ISet <Query> flatQueries = new JCG.HashSet <Query>();

            fq.Flatten(query, reader, flatQueries);
            assertCollectionQueries(flatQueries, tq(boost, "A"));
        }
示例#2
0
        public void TestFlattenConstantScoreQuery()
        {
            initBoost();
            Query query = new ConstantScoreQuery(pqF("A"));

            query.Boost = (boost);
            FieldQuery   fq          = new FieldQuery(query, true, true);
            ISet <Query> flatQueries = new JCG.HashSet <Query>();

            fq.Flatten(query, reader, flatQueries);
            assertCollectionQueries(flatQueries, tq(boost, "A"));
        }
示例#3
0
        public void Test1Phrase()
        {
            makeIndex();

            FieldQuery     fq    = new FieldQuery(pqF("c", "d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(3, stack.termList.size());
            assertEquals("c(10,11,5)", stack.Pop().toString());
            assertEquals("c(18,19,9)", stack.Pop().toString());
            assertEquals("d(20,21,10)", stack.Pop().toString());
        }
示例#4
0
        private void defgMultiTermQueryTest(Query query)
        {
            FieldQuery     fq  = new FieldQuery(query, reader, true, true);
            QueryPhraseMap qpm = fq.GetFieldTermMap(F, "defg");

            assertNotNull(qpm);
            assertNull(fq.GetFieldTermMap(F, "dog"));
            List <TermInfo> phraseCandidate = new List <TermInfo>();

            phraseCandidate.Add(new TermInfo("defg", 0, 12, 0, 1));
            assertNotNull(fq.SearchPhrase(F, phraseCandidate));
        }
        public void TestBoostedPhraseHighlightTest()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            type.Freeze();
            StringBuilder text = new StringBuilder();

            text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk ");
            for (int i = 0; i < 10; i++)
            {
                text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
            }
            text.append("highlight words together ");
            for (int i = 0; i < 10; i++)
            {
                text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
            }
            doc.Add(new Field("text", text.toString().Trim(), type));
            writer.AddDocument(doc);
            FastVectorHighlighter highlighter = new FastVectorHighlighter();
            IndexReader           reader      = DirectoryReader.Open(writer, true);

            // This mimics what some query parsers do to <highlight words together>
            BooleanQuery terms = new BooleanQuery();

            terms.Add(clause("text", "highlight"), Occur.MUST);
            terms.Add(clause("text", "words"), Occur.MUST);
            terms.Add(clause("text", "together"), Occur.MUST);
            // This mimics what some query parsers do to <"highlight words together">
            BooleanQuery phrase = new BooleanQuery();

            phrase.Add(clause("text", "highlight", "words", "together"), Occur.MUST);
            phrase.Boost = (100);
            // Now combine those results in a boolean query which should pull the phrases to the front of the list of fragments
            BooleanQuery query = new BooleanQuery();

            query.Add(phrase, Occur.MUST);
            query.Add(phrase, Occur.SHOULD);
            FieldQuery fieldQuery = new FieldQuery(query, reader, true, false);
            String     fragment   = highlighter.GetBestFragment(fieldQuery, reader, 0, "text", 100);

            assertEquals("junk junk junk junk junk junk junk junk <b>highlight words together</b> junk junk junk junk junk junk junk junk", fragment);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
示例#6
0
        public void TestFlattenDisjunctionMaxQuery()
        {
            initBoost();
            Query query = dmq(tq("A"), tq("B"), pqF("C", "D"));

            query.Boost = (boost);
            FieldQuery   fq          = new FieldQuery(query, true, true);
            ISet <Query> flatQueries = new JCG.HashSet <Query>();

            fq.Flatten(query, reader, flatQueries);
            assertCollectionQueries(flatQueries, tq(boost, "A"), tq(boost, "B"), pqF(boost, "C", "D"));
        }
示例#7
0
        public void TestWildcard()
        {
            makeIndexLongMV();
            FieldQuery     fq    = new FieldQuery(new WildcardQuery(new Term(F, "th*e")), reader, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(4, stack.termList.size());
            assertEquals("the(15,18,2)", stack.Pop().toString());
            assertEquals("these(133,138,20)", stack.Pop().toString());
            assertEquals("the(153,156,23)", stack.Pop().toString());
            assertEquals("the(195,198,31)", stack.Pop().toString());
        }
示例#8
0
        public void TestFieldPhraseListIndex2w1wSearch1partial()
        {
            makeIndex2w1w();

            FieldQuery      fq    = new FieldQuery(tq("computer"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("computer(1.0)((3,20))", fpl.PhraseList[0].toString());
            assertEquals(3, fpl.PhraseList[0].StartOffset);
            assertEquals(20, fpl.PhraseList[0].EndOffset);
        }
示例#9
0
        public void TestFieldPhraseListIndex1w2wSearch1phrase()
        {
            makeIndex1w2w();

            FieldQuery      fq    = new FieldQuery(pqF("personal", "computer"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("personalcomputer(1.0)((3,5))", fpl.PhraseList[0].toString());
            assertEquals(3, fpl.PhraseList[0].StartOffset);
            assertEquals(5, fpl.PhraseList[0].EndOffset);
        }
        public void TestPhraseSlop()
        {
            make1d1fIndex("c a a b c");

            FieldQuery      fq    = new FieldQuery(pqF(2F, 1, "a", "c"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("ac(2.0)((4,5)(8,9))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
            assertEquals(4, fpl.PhraseList[0].StartOffset);
            assertEquals(9, fpl.PhraseList[0].EndOffset);
        }
示例#11
0
        public void Test1PhraseMVB()
        {
            makeIndexLongMVB();

            FieldQuery     fq    = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(4, stack.termList.size());
            assertEquals("sp(88,90,61)", stack.Pop().toString());
            assertEquals("pe(89,91,62)", stack.Pop().toString());
            assertEquals("ee(90,92,63)", stack.Pop().toString());
            assertEquals("ed(91,93,64)", stack.Pop().toString());
        }
示例#12
0
        public void Test1PhraseLongMV()
        {
            makeIndexLongMV();

            FieldQuery     fq    = new FieldQuery(pqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(4, stack.termList.size());
            assertEquals("search(102,108,14)", stack.Pop().toString());
            assertEquals("engines(109,116,15)", stack.Pop().toString());
            assertEquals("search(157,163,24)", stack.Pop().toString());
            assertEquals("engines(164,171,25)", stack.Pop().toString());
        }
示例#13
0
        public void Test1PhraseB()
        {
            makeIndexB();

            FieldQuery     fq    = new FieldQuery(pqF("ab", "bb"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(4, stack.termList.size());
            assertEquals("ab(2,4,2)", stack.Pop().toString());
            assertEquals("bb(3,5,3)", stack.Pop().toString());
            assertEquals("ab(6,8,6)", stack.Pop().toString());
            assertEquals("bb(7,9,7)", stack.Pop().toString());
        }
示例#14
0
        public void Test1PhraseLongMVB()
        {
            makeIndexLongMVB();

            FieldQuery            fq    = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
        }
示例#15
0
        public void Test1PhraseLongMV()
        {
            makeIndexLongMV();

            FieldQuery            fq    = new FieldQuery(pqF("search", "engines"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
        }
示例#16
0
        public void TestFieldPhraseListIndex2w1wSearch1partial()
        {
            makeIndex2w1w();

            FieldQuery      fq    = new FieldQuery(tq("computer"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("computer(1.0)((3,20))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
            assertEquals(3, fpl.PhraseList[0].StartOffset);
            assertEquals(20, fpl.PhraseList[0].EndOffset);
        }
示例#17
0
        public void TestExpandNotFieldMatch()
        {
            Query      dummy = pqF("DUMMY");
            FieldQuery fq    = new FieldQuery(dummy, true, false);

            // f1:"a b",f2:"b c" => f1:"a b",f2:"b c",f1:"a b c"
            ISet <Query> flatQueries = new JCG.HashSet <Query>();

            flatQueries.Add(pq(F1, "a", "b"));
            flatQueries.Add(pq(F2, "b", "c"));
            assertCollectionQueries(fq.Expand(flatQueries),
                                    pq(F1, "a", "b"), pq(F2, "b", "c"), pq(F1, "a", "b", "c"));
        }
示例#18
0
        public void Test1PhraseShortMV()
        {
            makeIndexShortMV();

            FieldQuery            fq    = new FieldQuery(tq("d"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
        }
示例#19
0
        public void TestUnstoredField()
        {
            makeUnstoredIndex();

            FieldQuery             fq    = new FieldQuery(tq("aaa"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            assertNull(sfb.CreateFragment(reader, 0, F, ffl));
        }
示例#20
0
        public void TestPhraseSlop()
        {
            make1d1fIndex("c a a b c");

            FieldQuery      fq    = new FieldQuery(pqF(2F, 1, "a", "c"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("ac(2.0)((4,5)(8,9))", fpl.PhraseList[0].toString());
            assertEquals(4, fpl.PhraseList[0].StartOffset);
            assertEquals(9, fpl.PhraseList[0].EndOffset);
        }
示例#21
0
        public void Test1PhraseLongMVB()
        {
            makeIndexLongMVB();

            FieldQuery             fq    = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            assertEquals("additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl));
        }
        public void Test1PhraseLongMVB()
        {
            makeIndexLongMVB();

            FieldQuery            fq    = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].toString());
        }
        public void Test1PhraseLongMV()
        {
            makeIndexLongMV();

            FieldQuery            fq    = new FieldQuery(pqF("search", "engines"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].toString());
        }
        public void Test1PhraseShortMV()
        {
            makeIndexShortMV();

            FieldQuery            fq    = new FieldQuery(tq("d"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].toString());
        }
        public void TestCommonTermsQueryHighlight()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
            FieldType   type   = new FieldType(TextField.TYPE_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            type.Freeze();
            String[] texts =
            {
                "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
                "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
                "JFK has been shot",                                                                                                                                                       "John Kennedy has been shot",
                "This text has a typo in referring to Keneddy",
                "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc",                                                                                                             "y z x y z a b", "lets is a the lets is a the lets is a the lets"
            };
            for (int i = 0; i < texts.Length; i++)
            {
                Document doc   = new Document();
                Field    field = new Field("field", texts[i], type);
                doc.Add(field);
                writer.AddDocument(doc);
            }
            CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);

            query.Add(new Term("field", "text"));
            query.Add(new Term("field", "long"));
            query.Add(new Term("field", "very"));

            FastVectorHighlighter highlighter = new FastVectorHighlighter();
            IndexReader           reader      = DirectoryReader.Open(writer, true);
            IndexSearcher         searcher    = NewSearcher(reader);
            TopDocs hits = searcher.Search(query, 10);

            assertEquals(2, hits.TotalHits);
            FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader);

            String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, hits.ScoreDocs[0].Doc, "field", 1000, 1);
            assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);

            fieldQuery    = highlighter.GetFieldQuery(query, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, hits.ScoreDocs[1].Doc, "field", 1000, 1);
            assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
示例#26
0
        public void TestMVSeparator()
        {
            makeIndexShortMV();

            FieldQuery             fq    = new FieldQuery(tq("d"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            sfb.MultiValuedSeparator = ('/');
            assertEquals("//a b c//<b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }
示例#27
0
        public void Test1PhraseLongMV()
        {
            makeIndexLongMV();

            FieldQuery             fq    = new FieldQuery(pqF("search", "engines"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            assertEquals("customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can",
                         sfb.CreateFragment(reader, 0, F, ffl));
        }
示例#28
0
        public void Test1PhraseShortMV()
        {
            makeIndexShortMV();

            FieldQuery             fq    = new FieldQuery(tq("d"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            // Should we probably be trimming?
            assertEquals("  a b c  <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }
示例#29
0
        public void Test1PhraseIndexB()
        {
            // 01 12 23 34 45 56 67 78 (offsets)
            // bb|bb|ba|ac|cb|ba|ab|bc
            //  0  1  2  3  4  5  6  7 (positions)
            make1d1fIndexB("bbbacbabc");

            FieldQuery      fq    = new FieldQuery(pqF("ba", "ac"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("baac(1.0)((2,5))", fpl.PhraseList[0].toString());
        }
        public void Test1PhraseIndexB()
        {
            // 01 12 23 34 45 56 67 78 (offsets)
            // bb|bb|ba|ac|cb|ba|ab|bc
            //  0  1  2  3  4  5  6  7 (positions)
            make1d1fIndexB("bbbacbabc");

            FieldQuery      fq    = new FieldQuery(pqF("ba", "ac"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            assertEquals(1, fpl.PhraseList.size());
            assertEquals("baac(1.0)((2,5))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
        }