public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { if (fragCharSize < MIN_FRAG_CHAR_SIZE) throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + MIN_FRAG_CHAR_SIZE + " or higher."); FieldFragList ffl = new FieldFragList(fragCharSize); List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>(); LinkedList<WeightedPhraseInfo>.Enumerator ite = fieldPhraseList.phraseList.GetEnumerator(); WeightedPhraseInfo phraseInfo = null; int startOffset = 0; bool taken = false; while (true) { if (!taken) { if (!ite.MoveNext()) break; phraseInfo = ite.Current; } taken = false; if (phraseInfo == null) break; // if the phrase violates the border of previous fragment, discard it and try next phrase if (phraseInfo.StartOffset < startOffset) { if(phraseInfo.EndOffset < startOffset) continue; startOffset = phraseInfo.StartOffset; } wpil.Clear(); wpil.Add(phraseInfo); int st = phraseInfo.StartOffset - MARGIN < startOffset ? startOffset : phraseInfo.StartOffset - MARGIN; int en = st + fragCharSize; if (phraseInfo.EndOffset > en) en = phraseInfo.EndOffset; startOffset = en; while (true) { if (ite.MoveNext()) { phraseInfo = ite.Current; taken = true; if (phraseInfo == null) break; } else break; if (phraseInfo.EndOffset <= en) wpil.Add(phraseInfo); else break; } ffl.Add(st, en, wpil); } return ffl; }
private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize, IState state) { FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery, state); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit); return(fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize)); }
private FieldFragList Ffl(String queryValue, String indexValue) { Make1d1fIndex(indexValue); Query query = paW.Parse(queryValue); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); return new SimpleFragListBuilder().CreateFieldFragList(fpl, 20); }
private FieldFragList Ffl(String queryValue, String indexValue) { Make1d1fIndex(indexValue); Query query = paW.Parse(queryValue); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); return(new SimpleFragListBuilder().CreateFieldFragList(fpl, 20)); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("d(1.0)((6,7))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString()); }
public void Test2TermsIndex() { Make1d1fIndex("a a"); FieldQuery fq = new FieldQuery(Tq("a"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("a(1.0)((0,1))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Next.Value.ToString()); }
public void Test3TermsPhrase() { Make1d1fIndex("d a b a b c d"); FieldQuery fq = new FieldQuery(PqF("a", "b", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("abc(1.0)((6,11))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("searchengines(1.0)((102,116))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("searchengines(1.0)((157,171))", fpl.phraseList.First.Next.Value.ToString()); }
public void TestCompleteSearchInLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(Preq("engin"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("engines(1.0)((109,116))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("engines(1.0)((164,171))", fpl.phraseList.First.Next.Value.ToString()); }
public void TestPhraseSlop() { Make1d1fIndex("c a a b c"); FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual(4, fpl.phraseList.First.Value.StartOffset); Assert.AreEqual(9, fpl.phraseList.First.Value.EndOffset); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos[0].ToString()); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestUnstoredField() { MakeUnstoredIndex(); FieldQuery fq = new FieldQuery(Tq("aaa"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos[0].ToString()); }
public void TestFieldPhraseListIndex1w2wSearch1phrase() { MakeIndex1w2w(); FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("personalcomputer(1.0)((3,5))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset); Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset); }
public void TestFieldPhraseListIndex2w1wSearch1partial() { MakeIndex2w1w(); FieldQuery fq = new FieldQuery(Tq("computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("computer(1.0)((3,20))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset); Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); Assert.AreEqual(1, ffl.fragInfos.Count); Assert.AreEqual("subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos[0].ToString()); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseIndexB() { // 01 12 23 34 45 56 67 78 (offsets) // bb|bb|ba|ac|cb|ba|ab|bc // 0 1 2 3 4 5 6 7 (positions) Make1d1fIndexB("bbbacbabc"); FieldQuery fq = new FieldQuery(PqF("ba", "ac"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("baac(1.0)((2,5))", fpl.phraseList.First.Value.ToString()); }
public void Test2ConcatTermsIndexB() { // 01 12 23 (offsets) // ab|ba|ab // 0 1 2 (positions) Make1d1fIndexB("abab"); FieldQuery fq = new FieldQuery(Tq("ab"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((0,2))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("ab(1.0)((2,4))", fpl.phraseList.First.Next.Value.ToString()); }
public void Test2PhrasesOverlap() { Make1d1fIndex("d a b c d"); BooleanQuery query = new BooleanQuery(); query.Add(PqF("a", "b"), Occur.SHOULD); query.Add(PqF("b", "c"), Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("abc(1.0)((2,7))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseIndex() { Make1d1fIndex("a b"); FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((0,3))", fpl.phraseList.First.Value.ToString()); fq = new FieldQuery(Tq("b"), true, true); stack = new FieldTermStack(reader, 0, F, fq); fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("b(1.0)((2,3))", fpl.phraseList.First.Value.ToString()); }
public void TestSearchLongestPhrase() { Make1d1fIndex("d a b d c a b c"); BooleanQuery query = new BooleanQuery(); query.Add(PqF("a", "b"), Occur.SHOULD); query.Add(PqF("a", "b", "c"), Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString()); }
public void TestFieldPhraseListIndex2w1wSearch1term1phrase() { MakeIndex2w1w(); BooleanQuery bq = new BooleanQuery(); bq.Add(Tq("pc"), Occur.SHOULD); bq.Add(PqF("personal", "computer"), Occur.SHOULD); FieldQuery fq = new FieldQuery(bq, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.IsTrue(fpl.phraseList.First.Value.ToString().IndexOf("(1.0)((3,20))") > 0); Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset); Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset); }
public void Test2Terms1PhraseIndex() { Make1d1fIndex("c a a b"); // phraseHighlight = true FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Value.ToString()); // phraseHighlight = false fq = new FieldQuery(PqF("a", "b"), false, true); stack = new FieldTermStack(reader, 0, F, fq); fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Next.Value.ToString()); }
public void TestPhraseSlop() { Make1d1fIndex("c a a b c"); FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual(4, fpl.phraseList.First.Value.GetStartOffset()); Assert.AreEqual(9, fpl.phraseList.First.Value.GetEndOffset()); }
public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { if (fragCharSize < MIN_FRAG_CHAR_SIZE) { throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + MIN_FRAG_CHAR_SIZE + " or higher."); } FieldFragList ffl = new FieldFragList(fragCharSize); List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>(); LinkedList <WeightedPhraseInfo> .Enumerator ite = fieldPhraseList.phraseList.GetEnumerator(); WeightedPhraseInfo phraseInfo = null; int startOffset = 0; bool taken = false; while (true) { if (!taken) { if (!ite.MoveNext()) { break; } phraseInfo = ite.Current; } taken = false; if (phraseInfo == null) { break; } // if the phrase violates the border of previous fragment, discard it and try next phrase if (phraseInfo.StartOffset < startOffset) { if (phraseInfo.EndOffset < startOffset) { continue; } startOffset = phraseInfo.StartOffset; } wpil.Clear(); wpil.Add(phraseInfo); int st = phraseInfo.StartOffset - MARGIN < startOffset ? startOffset : phraseInfo.StartOffset - MARGIN; int en = st + fragCharSize; if (phraseInfo.EndOffset > en) { en = phraseInfo.EndOffset; } startOffset = en; while (true) { if (ite.MoveNext()) { phraseInfo = ite.Current; taken = true; if (phraseInfo == null) { break; } } else { break; } if (phraseInfo.EndOffset <= en) { wpil.Add(phraseInfo); } else { break; } } ffl.Add(st, en, wpil); } return(ffl); }
private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize) { FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit); return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize); }
public void TestSearchLongestPhrase() { Make1d1fIndex("d a b d c a b c"); BooleanQuery query = new BooleanQuery(); query.Add(PqF("a", "b"), Lucene.Net.Search.BooleanClause.Occur.SHOULD); query.Add(PqF("a", "b", "c"), Lucene.Net.Search.BooleanClause.Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString()); }