public void TestFlattenFilteredQuery() { initBoost(); Query query = new FilteredQuery(pqF("A"), new TestFlattenFilteredQueryFilterAnonymousHelper()); query.Boost = (boost); FieldQuery fq = new FieldQuery(query, true, true); ISet <Query> flatQueries = new JCG.HashSet <Query>(); fq.Flatten(query, reader, flatQueries); assertCollectionQueries(flatQueries, tq(boost, "A")); }
public void TestFlattenConstantScoreQuery() { initBoost(); Query query = new ConstantScoreQuery(pqF("A")); query.Boost = (boost); FieldQuery fq = new FieldQuery(query, true, true); ISet <Query> flatQueries = new JCG.HashSet <Query>(); fq.Flatten(query, reader, flatQueries); assertCollectionQueries(flatQueries, tq(boost, "A")); }
public void Test1Phrase() { makeIndex(); FieldQuery fq = new FieldQuery(pqF("c", "d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(3, stack.termList.size()); assertEquals("c(10,11,5)", stack.Pop().toString()); assertEquals("c(18,19,9)", stack.Pop().toString()); assertEquals("d(20,21,10)", stack.Pop().toString()); }
private void defgMultiTermQueryTest(Query query) { FieldQuery fq = new FieldQuery(query, reader, true, true); QueryPhraseMap qpm = fq.GetFieldTermMap(F, "defg"); assertNotNull(qpm); assertNull(fq.GetFieldTermMap(F, "dog")); List <TermInfo> phraseCandidate = new List <TermInfo>(); phraseCandidate.Add(new TermInfo("defg", 0, 12, 0, 1)); assertNotNull(fq.SearchPhrase(F, phraseCandidate)); }
public void TestBoostedPhraseHighlightTest() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); StringBuilder text = new StringBuilder(); text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk "); for (int i = 0; i < 10; i++) { text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk "); } text.append("highlight words together "); for (int i = 0; i < 10; i++) { text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk "); } doc.Add(new Field("text", text.toString().Trim(), type)); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); // This mimics what some query parsers do to <highlight words together> BooleanQuery terms = new BooleanQuery(); terms.Add(clause("text", "highlight"), Occur.MUST); terms.Add(clause("text", "words"), Occur.MUST); terms.Add(clause("text", "together"), Occur.MUST); // This mimics what some query parsers do to <"highlight words together"> BooleanQuery phrase = new BooleanQuery(); phrase.Add(clause("text", "highlight", "words", "together"), Occur.MUST); phrase.Boost = (100); // Now combine those results in a boolean query which should pull the phrases to the front of the list of fragments BooleanQuery query = new BooleanQuery(); query.Add(phrase, Occur.MUST); query.Add(phrase, Occur.SHOULD); FieldQuery fieldQuery = new FieldQuery(query, reader, true, false); String fragment = highlighter.GetBestFragment(fieldQuery, reader, 0, "text", 100); assertEquals("junk junk junk junk junk junk junk junk <b>highlight words together</b> junk junk junk junk junk junk junk junk", fragment); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestFlattenDisjunctionMaxQuery() { initBoost(); Query query = dmq(tq("A"), tq("B"), pqF("C", "D")); query.Boost = (boost); FieldQuery fq = new FieldQuery(query, true, true); ISet <Query> flatQueries = new JCG.HashSet <Query>(); fq.Flatten(query, reader, flatQueries); assertCollectionQueries(flatQueries, tq(boost, "A"), tq(boost, "B"), pqF(boost, "C", "D")); }
public void TestWildcard() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(new WildcardQuery(new Term(F, "th*e")), reader, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(4, stack.termList.size()); assertEquals("the(15,18,2)", stack.Pop().toString()); assertEquals("these(133,138,20)", stack.Pop().toString()); assertEquals("the(153,156,23)", stack.Pop().toString()); assertEquals("the(195,198,31)", stack.Pop().toString()); }
public void TestFieldPhraseListIndex2w1wSearch1partial() { makeIndex2w1w(); FieldQuery fq = new FieldQuery(tq("computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("computer(1.0)((3,20))", fpl.PhraseList[0].toString()); assertEquals(3, fpl.PhraseList[0].StartOffset); assertEquals(20, fpl.PhraseList[0].EndOffset); }
public void TestFieldPhraseListIndex1w2wSearch1phrase() { makeIndex1w2w(); FieldQuery fq = new FieldQuery(pqF("personal", "computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("personalcomputer(1.0)((3,5))", fpl.PhraseList[0].toString()); assertEquals(3, fpl.PhraseList[0].StartOffset); assertEquals(5, fpl.PhraseList[0].EndOffset); }
public void TestPhraseSlop() { make1d1fIndex("c a a b c"); FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("ac(2.0)((4,5)(8,9))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware assertEquals(4, fpl.PhraseList[0].StartOffset); assertEquals(9, fpl.PhraseList[0].EndOffset); }
public void Test1PhraseMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(4, stack.termList.size()); assertEquals("sp(88,90,61)", stack.Pop().toString()); assertEquals("pe(89,91,62)", stack.Pop().toString()); assertEquals("ee(90,92,63)", stack.Pop().toString()); assertEquals("ed(91,93,64)", stack.Pop().toString()); }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(4, stack.termList.size()); assertEquals("search(102,108,14)", stack.Pop().toString()); assertEquals("engines(109,116,15)", stack.Pop().toString()); assertEquals("search(157,163,24)", stack.Pop().toString()); assertEquals("engines(164,171,25)", stack.Pop().toString()); }
public void Test1PhraseB() { makeIndexB(); FieldQuery fq = new FieldQuery(pqF("ab", "bb"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(4, stack.termList.size()); assertEquals("ab(2,4,2)", stack.Pop().toString()); assertEquals("bb(3,5,3)", stack.Pop().toString()); assertEquals("ab(6,8,6)", stack.Pop().toString()); assertEquals("bb(7,9,7)", stack.Pop().toString()); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestFieldPhraseListIndex2w1wSearch1partial() { makeIndex2w1w(); FieldQuery fq = new FieldQuery(tq("computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("computer(1.0)((3,20))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware assertEquals(3, fpl.PhraseList[0].StartOffset); assertEquals(20, fpl.PhraseList[0].EndOffset); }
public void TestExpandNotFieldMatch() { Query dummy = pqF("DUMMY"); FieldQuery fq = new FieldQuery(dummy, true, false); // f1:"a b",f2:"b c" => f1:"a b",f2:"b c",f1:"a b c" ISet <Query> flatQueries = new JCG.HashSet <Query>(); flatQueries.Add(pq(F1, "a", "b")); flatQueries.Add(pq(F2, "b", "c")); assertCollectionQueries(fq.Expand(flatQueries), pq(F1, "a", "b"), pq(F2, "b", "c"), pq(F1, "a", "b", "c")); }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestUnstoredField() { makeUnstoredIndex(); FieldQuery fq = new FieldQuery(tq("aaa"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertNull(sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestPhraseSlop() { make1d1fIndex("c a a b c"); FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("ac(2.0)((4,5)(8,9))", fpl.PhraseList[0].toString()); assertEquals(4, fpl.PhraseList[0].StartOffset); assertEquals(9, fpl.PhraseList[0].EndOffset); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertEquals("additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].toString()); }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].toString()); }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].toString()); }
public void TestCommonTermsQueryHighlight() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); String[] texts = { "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", "JFK has been shot", "John Kennedy has been shot", "This text has a typo in referring to Keneddy", "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; for (int i = 0; i < texts.Length; i++) { Document doc = new Document(); Field field = new Field("field", texts[i], type); doc.Add(field); writer.AddDocument(doc); } CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2); query.Add(new Term("field", "text")); query.Add(new Term("field", "long")); query.Add(new Term("field", "very")); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); IndexSearcher searcher = NewSearcher(reader); TopDocs hits = searcher.Search(query, 10); assertEquals(2, hits.TotalHits); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, hits.ScoreDocs[0].Doc, "field", 1000, 1); assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]); fieldQuery = highlighter.GetFieldQuery(query, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, hits.ScoreDocs[1].Doc, "field", 1000, 1); assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestMVSeparator() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.MultiValuedSeparator = ('/'); assertEquals("//a b c//<b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertEquals("customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); // Should we probably be trimming? assertEquals(" a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseIndexB() { // 01 12 23 34 45 56 67 78 (offsets) // bb|bb|ba|ac|cb|ba|ab|bc // 0 1 2 3 4 5 6 7 (positions) make1d1fIndexB("bbbacbabc"); FieldQuery fq = new FieldQuery(pqF("ba", "ac"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("baac(1.0)((2,5))", fpl.PhraseList[0].toString()); }
public void Test1PhraseIndexB() { // 01 12 23 34 45 56 67 78 (offsets) // bb|bb|ba|ac|cb|ba|ab|bc // 0 1 2 3 4 5 6 7 (positions) make1d1fIndexB("bbbacbabc"); FieldQuery fq = new FieldQuery(pqF("ba", "ac"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); assertEquals(1, fpl.PhraseList.size()); assertEquals("baac(1.0)((2,5))", fpl.PhraseList[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }