public void TestSmallerFragSizeThanTermQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.FragInfos[0].toString()); }
public void TestSmallerFragSizeThanTermQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void Test1TermIndex() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a"), 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1)))/1.0(0,100)", ffl.FragInfos[0].toString()); }
public void Test1TermIndex() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a"), 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1)))/1.0(0,100)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestLongFieldFragList() { SingleFragListBuilder sflb = new SingleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g"), 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.FragInfos[0].toString()); }
/// <summary> /// return the best fragment. /// </summary> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fragCharSize">the length (number of chars) of a fragment</param> /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param> /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param> /// <param name="preTags">pre-tags to be used to highlight terms</param> /// <param name="postTags">post-tags to be used to highlight terms</param> /// <param name="encoder">an encoder that generates encoded text</param> /// <returns>the best fragment (snippet) string</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public string GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId, string fieldName, int fragCharSize, IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder, string[] preTags, string[] postTags, IEncoder encoder) { FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return(fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder)); }
public void TestTagsAndEncoder() { FieldFragList ffl = Ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] preTags = { "[" }; String[] postTags = { "]" }; assertEquals("<h1> [a] </h1>", sfb.CreateFragment(reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder())); }
/// <summary> /// Return the best fragments. Matches are scanned from <paramref name="matchedFields"/> and turned into fragments against /// <paramref name="storedField"/>. The highlighting may not make sense if <paramref name="matchedFields"/> has matches with offsets that don't /// correspond features in <paramref name="storedField"/>. It will outright throw a <see cref="IndexOutOfRangeException"/> /// if <paramref name="matchedFields"/> produces offsets outside of <paramref name="storedField"/>. As such it is advisable that all /// <paramref name="matchedFields"/> share the same source as <paramref name="storedField"/> or are at least a prefix of it. /// </summary> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="storedField">field of the document that stores the text</param> /// <param name="matchedFields">fields of the document to scan for matches</param> /// <param name="fragCharSize">the length (number of chars) of a fragment</param> /// <param name="maxNumFragments">maximum number of fragments</param> /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param> /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param> /// <param name="preTags">pre-tags to be used to highlight terms</param> /// <param name="postTags">post-tags to be used to highlight terms</param> /// <param name="encoder">an encoder that generates encoded text</param> /// <returns> /// created fragments or null when no fragments created. /// size of the array can be less than <paramref name="maxNumFragments"/> /// </returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId, string storedField, ISet <string> matchedFields, int fragCharSize, int maxNumFragments, IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder, string[] preTags, string[] postTags, IEncoder encoder) { FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize); return(fragmentsBuilder.CreateFragments(reader, docId, storedField, fieldFragList, maxNumFragments, preTags, postTags, encoder)); }
public void Test1TermIndex() { FieldFragList ffl = Ffl(new TermQuery(new Term(F, "a")), "a"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertEquals("<b>a</b>", sfb.CreateFragment(reader, 0, F, ffl)); // change tags sfb = new SimpleFragmentsBuilder(new String[] { "[" }, new String[] { "]" }); assertEquals("[a]", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test2Frags() { FieldFragList ffl = Ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3); // 3 snippets requested, but should be 2 assertEquals(2, f.Length); assertEquals("<b>a</b> b b b b b b b b b b", f[0]); assertEquals("b b <b>a</b> b <b>a</b> b", f[1]); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertEquals("additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestUnstoredField() { makeUnstoredIndex(); FieldQuery fq = new FieldQuery(tq("aaa"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertNull(sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].toString()); }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].toString()); }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(d((9,10)))/1.0(0,100)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public virtual string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, string[] preTags, string[] postTags, IEncoder encoder) { string[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1, preTags, postTags, encoder); if (fragments == null || fragments.Length == 0) { return(null); } return(fragments[0]); }
public void Test1PhraseLongMVB() { makeIndexLongMVB(); FieldQuery fq = new FieldQuery(pqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.FragInfos[0].toString()); }
public void Test1PhraseLongMV() { makeIndexLongMV(); FieldQuery fq = new FieldQuery(pqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); assertEquals("customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseShortMV() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); // Should we probably be trimming? assertEquals(" a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestMVSeparator() { makeIndexShortMV(); FieldQuery fq = new FieldQuery(tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.MultiValuedSeparator = ('/'); assertEquals("//a b c//<b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestPhraseQuerySlop() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Slop = (1); phraseQuery.Add(new Term(F, "a")); phraseQuery.Add(new Term(F, "b")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a c b"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestPhraseQuerySlop() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Slop = (1); phraseQuery.Add(new Term(F, "a")); phraseQuery.Add(new Term(F, "b")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a c b"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.FragInfos[0].toString()); }
public void Test3Frags() { BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(F, "a")), Occur.SHOULD); booleanQuery.Add(new TermQuery(new Term(F, "c")), Occur.SHOULD); FieldFragList ffl = Ffl(booleanQuery, "a b b b b b b b b b b b a b a b b b b b c a a b b"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3); assertEquals(3, f.Length); assertEquals("<b>a</b> b b b b b b b b b b", f[0]); assertEquals("b b <b>a</b> b <b>a</b> b b b b b c", f[1]); assertEquals("<b>c</b> <b>a</b> <b>a</b> b b", f[2]); }
public void Test2TermsIndex1Frag() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a a"), 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.FragInfos[0].toString()); ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.FragInfos[0].toString()); ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((8,9))a((18,19)))/2.0(4,24)", ffl.FragInfos[0].toString()); }
public void Test2TermsIndex1Frag() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a a"), 100); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(a((8,9))a((18,19)))/2.0(4,24)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestSmallerFragSizeThanPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(F, "abcdefgh")); phraseQuery.Add(new Term(F, "jklmnopqrs")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh jklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); if (Verbose) { Console.WriteLine(ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware } assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public void TestSmallerFragSizeThanPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(F, "abcdefgh")); phraseQuery.Add(new Term(F, "jklmnopqrs")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh jklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); if (VERBOSE) { Console.WriteLine(ffl.FragInfos[0].toString()); } assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].toString()); }
public void Test2TermsIndex2Frags() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a"), 20); assertEquals(2, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1)))/1.0(0,20)", ffl.FragInfos[0].toString()); assertEquals("subInfos=(a((28,29)))/1.0(20,40)", ffl.FragInfos[1].toString()); ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a"), 20); assertEquals(2, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1)))/1.0(0,20)", ffl.FragInfos[0].toString()); assertEquals("subInfos=(a((26,27)))/1.0(20,40)", ffl.FragInfos[1].toString()); ffl = sflb.CreateFieldFragList(fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b a"), 20); assertEquals(2, ffl.FragInfos.size()); assertEquals("subInfos=(a((0,1)))/1.0(0,20)", ffl.FragInfos[0].toString()); assertEquals("subInfos=(a((20,21)))/1.0(20,40)", ffl.FragInfos[1].toString()); }