public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { if (fragCharSize < MIN_FRAG_CHAR_SIZE) throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + MIN_FRAG_CHAR_SIZE + " or higher."); FieldFragList ffl = new FieldFragList(fragCharSize); List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>(); LinkedList<WeightedPhraseInfo>.Enumerator ite = fieldPhraseList.phraseList.GetEnumerator(); WeightedPhraseInfo phraseInfo = null; int startOffset = 0; bool taken = false; while (true) { if (!taken) { if (!ite.MoveNext()) break; phraseInfo = ite.Current; } taken = false; if (phraseInfo == null) break; // if the phrase violates the border of previous fragment, discard it and try next phrase if (phraseInfo.StartOffset < startOffset) { if(phraseInfo.EndOffset < startOffset) continue; startOffset = phraseInfo.StartOffset; } wpil.Clear(); wpil.Add(phraseInfo); int st = phraseInfo.StartOffset - MARGIN < startOffset ? startOffset : phraseInfo.StartOffset - MARGIN; int en = st + fragCharSize; if (phraseInfo.EndOffset > en) en = phraseInfo.EndOffset; startOffset = en; while (true) { if (ite.MoveNext()) { phraseInfo = ite.Current; taken = true; if (phraseInfo == null) break; } else break; if (phraseInfo.EndOffset <= en) wpil.Add(phraseInfo); else break; } ffl.Add(st, en, wpil); } return ffl; }
public void TestSmallerFragSizeThanTermQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("abcdefghijklmnopqrs", "abcdefghijklmnopqrs"), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos[0].ToString()); }
public void Test1TermIndex() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("a", "a"), 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos[0].ToString()); }
public void TestPhraseQuerySlop() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("\"a b\"~1", "a c b"), 20); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos[0].ToString()); }
public void TestSmallerFragSizeThanPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs"), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE); Assert.AreEqual(1, ffl.fragInfos.Count); Console.WriteLine(ffl.fragInfos[0].ToString()); Assert.AreEqual("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos[0].ToString()); }
public void Test1TermIndex() { FieldFragList ffl = this.ffl("a", "a"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("<b>a</b>", sfb.CreateFragment(reader, 0, F, ffl)); // change tags sfb = new SimpleFragmentsBuilder(new String[] { "[" }, new String[] { "]" }); Assert.AreEqual("[a]", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test3Frags() { FieldFragList ffl = this.ffl("a c", "a b b b b b b b b b b b a b a b b b b b c a a b b"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3); Assert.AreEqual(3, f.Length); Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[0]); Assert.AreEqual("b b <b>a</b> b <b>a</b> b b b b b ", f[1]); Assert.AreEqual("<b>c</b> <b>a</b> <b>a</b> b b", f[2]); }
public void Test2Frags() { FieldFragList ffl = this.ffl("a", "a b b b b b b b b b b b a b a b"); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3); // 3 snippets requested, but should be 2 Assert.AreEqual(2, f.Length); Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[0]); Assert.AreEqual("b b <b>a</b> b <b>a</b> b", f[1]); }
public void Test3Frags() { FieldFragList ffl = this.Ffl("a c", "a b b b b b b b b b b b a b a b b b b b c a a b b"); ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder(); String[] f = sofb.CreateFragments(reader, 0, F, ffl, 3, null); Assert.AreEqual(3, f.Length); // check score order Assert.AreEqual("<b>c</b> <b>a</b> <b>a</b> b b", f[0]); Assert.AreEqual("b b <b>a</b> b <b>a</b> b b b b b ", f[1]); Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[2]); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos[0].ToString()); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos[0].ToString()); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos[0].ToString()); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestUnstoredField() { MakeUnstoredIndex(); FieldQuery fq = new FieldQuery(Tq("aaa"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test2TermsQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("a b", "c d e"), 20); Assert.AreEqual(0, ffl.fragInfos.Count); ffl = sflb.CreateFieldFragList(fpl("a b", "d b c"), 20); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos[0].ToString()); ffl = sflb.CreateFieldFragList(fpl("a b", "a b c"), 20); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos[0].ToString()); }
public void Test2TermsIndex1Frag() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("a", "a a"), 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos[0].ToString()); ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b a"), 20); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos[0].ToString()); ffl = sflb.CreateFieldFragList(fpl("a", "b b b b a b b b b a"), 20); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos[0].ToString()); }
public virtual String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments) { if (maxNumFragments < 0) throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); List<WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos); List<String> fragments = new List<String>(maxNumFragments); Field[] values = GetFields(reader, docId, fieldName); if (values.Length == 0) return null; StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo)); } return fragments.ToArray(); }
public void Test2TermsIndex2Frags() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b b b b b a"), 20); Assert.AreEqual(2, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString()); Assert.AreEqual("subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos[1].ToString()); ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b b b b a"), 20); Assert.AreEqual(2, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString()); Assert.AreEqual("subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos[1].ToString()); ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b a"), 20); Assert.AreEqual(2, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString()); Assert.AreEqual("subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos[1].ToString()); }
public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList) { String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1); if (fragments == null || fragments.Length == 0) return null; return fragments[0]; }
public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, int fragCharSize, IState state) { if (maxNumFragments < 0) { throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); } List <WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos); List <String> fragments = new List <String>(maxNumFragments); Field[] values = GetFields(reader, docId, fieldName, state); if (values.Length == 0) { return(null); } StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, fragCharSize, state)); } return(fragments.ToArray()); }
public virtual string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int fragCharSize, IState state) { String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1, fragCharSize, state); if (fragments == null || fragments.Length == 0) { return(null); } return(fragments[0]); }
public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList) { String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1); if (fragments == null || fragments.Length == 0) { return(null); } return(fragments[0]); }
public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { if (fragCharSize < MIN_FRAG_CHAR_SIZE) { throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + MIN_FRAG_CHAR_SIZE + " or higher."); } FieldFragList ffl = new FieldFragList(fragCharSize); List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>(); LinkedList <WeightedPhraseInfo> .Enumerator ite = fieldPhraseList.phraseList.GetEnumerator(); WeightedPhraseInfo phraseInfo = null; int startOffset = 0; bool taken = false; while (true) { if (!taken) { if (!ite.MoveNext()) { break; } phraseInfo = ite.Current; } taken = false; if (phraseInfo == null) { break; } // if the phrase violates the border of previous fragment, discard it and try next phrase if (phraseInfo.StartOffset < startOffset) { if (phraseInfo.EndOffset < startOffset) { continue; } startOffset = phraseInfo.StartOffset; } wpil.Clear(); wpil.Add(phraseInfo); int st = phraseInfo.StartOffset - MARGIN < startOffset ? startOffset : phraseInfo.StartOffset - MARGIN; int en = st + fragCharSize; if (phraseInfo.EndOffset > en) { en = phraseInfo.EndOffset; } startOffset = en; while (true) { if (ite.MoveNext()) { phraseInfo = ite.Current; taken = true; if (phraseInfo == null) { break; } } else { break; } if (phraseInfo.EndOffset <= en) { wpil.Add(phraseInfo); } else { break; } } ffl.Add(st, en, wpil); } return(ffl); }
/// <summary> /// return the best fragments. /// </summary> /// <param name="fieldQuery">FieldQuery object</param> /// <param name="reader">IndexReader of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fragCharSize">the length (number of chars) of a fragment</param> /// <param name="maxNumFragments">maximum number of fragments</param> /// <param name="fragCharSize1"></param> /// <returns>created fragments or null when no fragments created. Size of the array can be less than maxNumFragments</returns> public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId, string fieldName, int fragCharSize, int maxNumFragments, IState state) { FieldFragList fieldFragList = GetFieldFragList(fieldQuery, reader, docId, fieldName, fragCharSize, state); return(fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments, fragCharSize, state)); }