/// <summary> /// a constructor. /// </summary> /// <param name="fieldTermStack">FieldTermStack object</param> /// <param name="fieldQuery">FieldQuery object</param> /// <param name="phraseLimit">maximum size of phraseList</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { String field = fieldTermStack.FieldName; LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>(); QueryPhraseMap currMap = null; QueryPhraseMap nextMap = null; while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit) ) { phraseCandidate.Clear(); TermInfo ti = fieldTermStack.Pop(); currMap = fieldQuery.GetFieldTermMap(field, ti.Text); // if not found, discard top TermInfo from stack, then try next element if (currMap == null) continue; // if found, search the longest phrase phraseCandidate.AddLast(ti); while (true) { ti = fieldTermStack.Pop(); nextMap = null; if (ti != null) nextMap = currMap.GetTermMap(ti.Text); if (ti == null || nextMap == null) { if (ti != null) fieldTermStack.Push(ti); if (currMap.IsValidTermOrPhrase(new List<TermInfo>(phraseCandidate))) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); } else { while (phraseCandidate.Count > 1) { TermInfo last = phraseCandidate.Last.Value; phraseCandidate.RemoveLast(); fieldTermStack.Push(last); currMap = fieldQuery.SearchPhrase(field, new List<TermInfo>(phraseCandidate)); if (currMap != null) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); break; } } } break; } else { phraseCandidate.AddLast(ti); currMap = nextMap; } } } }
public void TestFieldTermStackIndex1w2wSearch1term() { MakeIndex1w2w(); FieldQuery fq = new FieldQuery(Tq("pc"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(1, stack.termList.Count); Assert.AreEqual("pc(3,5,1)", stack.Pop().ToString()); }
private FieldFragList Ffl(String queryValue, String indexValue) { Make1d1fIndex(indexValue); Query query = paW.Parse(queryValue); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); return new SimpleFragListBuilder().CreateFieldFragList(fpl, 20); }
public void TestFieldTermStackIndex1w2wSearch1phrase() { MakeIndex1w2w(); FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(2, stack.termList.Count); Assert.AreEqual("personal(3,5,1)", stack.Pop().ToString()); Assert.AreEqual("computer(3,5,2)", stack.Pop().ToString()); }
public void Test2TermsIndex() { Make1d1fIndex("a a"); FieldQuery fq = new FieldQuery(Tq("a"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("a(1.0)((0,1))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Next.Value.ToString()); }
public void Test1Phrase() { MakeIndex(); FieldQuery fq = new FieldQuery(PqF("c", "d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(3, stack.termList.Count); Assert.AreEqual("c(10,11,5)", stack.Pop().ToString()); Assert.AreEqual("c(18,19,9)", stack.Pop().ToString()); Assert.AreEqual("d(20,21,10)", stack.Pop().ToString()); }
public void Test1Term() { MakeIndex(); FieldQuery fq = new FieldQuery(Tq("a"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(6, stack.termList.Count); Assert.AreEqual("a(0,1,0)", stack.Pop().ToString()); Assert.AreEqual("a(2,3,1)", stack.Pop().ToString()); Assert.AreEqual("a(4,5,2)", stack.Pop().ToString()); Assert.AreEqual("a(12,13,6)", stack.Pop().ToString()); Assert.AreEqual("a(28,29,14)", stack.Pop().ToString()); Assert.AreEqual("a(32,33,16)", stack.Pop().ToString()); }
public void TestFieldTermStackIndex1wSearch2terms() { MakeIndex1w(); BooleanQuery bq = new BooleanQuery(); bq.Add(Tq("Mac"), Occur.SHOULD); bq.Add(Tq("MacBook"), Occur.SHOULD); FieldQuery fq = new FieldQuery(bq, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(2, stack.termList.Count); Dictionary<String, String> expectedSet = new Dictionary<String, String>(); expectedSet.Add("Mac(11,20,3)",""); expectedSet.Add("MacBook(11,20,3)",""); Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString())); Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString())); }
public void Test1PhraseIndex() { Make1d1fIndex("a b"); FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((0,3))", fpl.phraseList.First.Value.ToString()); fq = new FieldQuery(Tq("b"), true, true); stack = new FieldTermStack(reader, 0, F, fq); fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("b(1.0)((2,3))", fpl.phraseList.First.Value.ToString()); }
public void Test2Terms() { MakeIndex(); BooleanQuery query = new BooleanQuery(); query.Add(Tq("b"), Occur.SHOULD); query.Add(Tq("c"), Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(8, stack.termList.Count); Assert.AreEqual("b(6,7,3)", stack.Pop().ToString()); Assert.AreEqual("b(8,9,4)", stack.Pop().ToString()); Assert.AreEqual("c(10,11,5)", stack.Pop().ToString()); Assert.AreEqual("b(14,15,7)", stack.Pop().ToString()); Assert.AreEqual("b(16,17,8)", stack.Pop().ToString()); Assert.AreEqual("c(18,19,9)", stack.Pop().ToString()); Assert.AreEqual("b(26,27,13)", stack.Pop().ToString()); Assert.AreEqual("b(30,31,15)", stack.Pop().ToString()); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("d(1.0)((6,7))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("searchengines(1.0)((102,116))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("searchengines(1.0)((157,171))", fpl.phraseList.First.Next.Value.ToString()); }
public void Test3TermsPhrase() { Make1d1fIndex("d a b a b c d"); FieldQuery fq = new FieldQuery(PqF("a", "b", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("abc(1.0)((6,11))", fpl.phraseList.First.Value.ToString()); }
public void TestSearchLongestPhrase() { Make1d1fIndex("d a b d c a b c"); BooleanQuery query = new BooleanQuery(); query.Add(PqF("a", "b"), Lucene.Net.Search.BooleanClause.Occur.SHOULD); query.Add(PqF("a", "b", "c"), Lucene.Net.Search.BooleanClause.Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString()); }
public void TestUnstoredField() { MakeUnstoredIndex(); FieldQuery fq = new FieldQuery(Tq("aaa"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl)); }
public void TestPhraseSlop() { Make1d1fIndex("c a a b c"); FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual(4, fpl.phraseList.First.Value.GetStartOffset()); Assert.AreEqual(9, fpl.phraseList.First.Value.GetEndOffset()); }
private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize) { FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit); return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize); }
public void Test1PhraseIndexB() { // 01 12 23 34 45 56 67 78 (offsets) // bb|bb|ba|ac|cb|ba|ab|bc // 0 1 2 3 4 5 6 7 (positions) Make1d1fIndexB("bbbacbabc"); FieldQuery fq = new FieldQuery(PqF("ba", "ac"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("baac(1.0)((2,5))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(4, stack.termList.Count); Assert.AreEqual("sp(88,90,61)", stack.Pop().ToString()); Assert.AreEqual("pe(89,91,62)", stack.Pop().ToString()); Assert.AreEqual("ee(90,92,63)", stack.Pop().ToString()); Assert.AreEqual("ed(91,93,64)", stack.Pop().ToString()); }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(4, stack.termList.Count); Assert.AreEqual("search(102,108,14)", stack.Pop().ToString()); Assert.AreEqual("engines(109,116,15)", stack.Pop().ToString()); Assert.AreEqual("search(157,163,24)", stack.Pop().ToString()); Assert.AreEqual("engines(164,171,25)", stack.Pop().ToString()); }
/// <summary> /// create a FieldPhraseList that has no limit on the number of phrases to analyze /// <param name="fieldQuery">FieldTermStack object</param> /// <param name="fieldTermStack">FieldQuery object</param> /// </summary> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, Int32.MaxValue) { }
public void Test1PhraseLongMV() { MakeIndexLongMV(); FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test1PhraseShortMV() { MakeIndexShortMV(); FieldQuery fq = new FieldQuery(Tq("d"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(1, stack.termList.Count); Assert.AreEqual("d(6,7,3)", stack.Pop().ToString()); }
public void Test1TermB() { makeIndexB(); FieldQuery fq = new FieldQuery(Tq("ab"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); Assert.AreEqual(2, stack.termList.Count); Assert.AreEqual("ab(2,4,2)", stack.Pop().ToString()); Assert.AreEqual("ab(6,8,6)", stack.Pop().ToString()); }
public void Test2PhrasesOverlap() { Make1d1fIndex("d a b c d"); BooleanQuery query = new BooleanQuery(); query.Add(PqF("a", "b"), Occur.SHOULD); query.Add(PqF("b", "c"), Occur.SHOULD); FieldQuery fq = new FieldQuery(query, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("abc(1.0)((2,7))", fpl.phraseList.First.Value.ToString()); }
public void Test1PhraseLongMVB() { MakeIndexLongMVB(); FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed" FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl)); }
public void Test2ConcatTermsIndexB() { // 01 12 23 (offsets) // ab|ba|ab // 0 1 2 (positions) Make1d1fIndexB("abab"); FieldQuery fq = new FieldQuery(Tq("ab"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((0,2))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("ab(1.0)((2,4))", fpl.phraseList.First.Next.Value.ToString()); }
public void Test2Terms1PhraseIndex() { Make1d1fIndex("c a a b"); // phraseHighlight = true FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(1, fpl.phraseList.Count); Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Value.ToString()); // phraseHighlight = false fq = new FieldQuery(PqF("a", "b"), false, true); stack = new FieldTermStack(reader, 0, F, fq); fpl = new FieldPhraseList(stack, fq); Assert.AreEqual(2, fpl.phraseList.Count); Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Value.ToString()); Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Next.Value.ToString()); }