예제 #1
0
        /// <summary>
        /// a constructor. 
        /// </summary>
        /// <param name="fieldTermStack">FieldTermStack object</param>
        /// <param name="fieldQuery">FieldQuery object</param>
        /// <param name="phraseLimit">maximum size of phraseList</param>
        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit)
        {
            String field = fieldTermStack.FieldName;

            LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
            QueryPhraseMap currMap = null;
            QueryPhraseMap nextMap = null;
            while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit) )
            {

                phraseCandidate.Clear();

                TermInfo ti = fieldTermStack.Pop();
                currMap = fieldQuery.GetFieldTermMap(field, ti.Text);

                // if not found, discard top TermInfo from stack, then try next element
                if (currMap == null) continue;

                // if found, search the longest phrase
                phraseCandidate.AddLast(ti);
                while (true)
                {
                    ti = fieldTermStack.Pop();
                    nextMap = null;
                    if (ti != null)
                        nextMap = currMap.GetTermMap(ti.Text);
                    if (ti == null || nextMap == null)
                    {
                        if (ti != null)
                            fieldTermStack.Push(ti);
                        if (currMap.IsValidTermOrPhrase(new List<TermInfo>(phraseCandidate)))
                        {
                            AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                        }
                        else
                        {
                            while (phraseCandidate.Count > 1)
                            {
                                TermInfo last = phraseCandidate.Last.Value;
                                phraseCandidate.RemoveLast();
                                fieldTermStack.Push(last);
                                currMap = fieldQuery.SearchPhrase(field, new List<TermInfo>(phraseCandidate));
                                if (currMap != null)
                                {
                                    AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                                    break;
                                }
                            }
                        }
                        break;
                    }
                    else
                    {
                        phraseCandidate.AddLast(ti);
                        currMap = nextMap;
                    }
                }
            }
        }
예제 #2
0
        public void TestFieldTermStackIndex1w2wSearch1term()
        {
            MakeIndex1w2w();

            FieldQuery fq = new FieldQuery(Tq("pc"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(1, stack.termList.Count);
            Assert.AreEqual("pc(3,5,1)", stack.Pop().ToString());
        }
 private FieldFragList Ffl(String queryValue, String indexValue)
 {
     Make1d1fIndex(indexValue);
     Query query = paW.Parse(queryValue);
     FieldQuery fq = new FieldQuery(query, true, true);
     FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
     FieldPhraseList fpl = new FieldPhraseList(stack, fq);
     return new SimpleFragListBuilder().CreateFieldFragList(fpl, 20);
 }
예제 #4
0
        public void TestFieldTermStackIndex1w2wSearch1phrase()
        {
            MakeIndex1w2w();

            FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(2, stack.termList.Count);
            Assert.AreEqual("personal(3,5,1)", stack.Pop().ToString());
            Assert.AreEqual("computer(3,5,2)", stack.Pop().ToString());
        }
예제 #5
0
        public void Test2TermsIndex()
        {
            Make1d1fIndex("a a");

            FieldQuery fq = new FieldQuery(Tq("a"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((0,1))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Next.Value.ToString());
        }
예제 #6
0
        public void Test1Phrase()
        {
            MakeIndex();

            FieldQuery fq = new FieldQuery(PqF("c", "d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(3, stack.termList.Count);
            Assert.AreEqual("c(10,11,5)", stack.Pop().ToString());
            Assert.AreEqual("c(18,19,9)", stack.Pop().ToString());
            Assert.AreEqual("d(20,21,10)", stack.Pop().ToString());
        }
예제 #7
0
        public void Test1Term()
        {
            MakeIndex();

            FieldQuery fq = new FieldQuery(Tq("a"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(6, stack.termList.Count);
            Assert.AreEqual("a(0,1,0)", stack.Pop().ToString());
            Assert.AreEqual("a(2,3,1)", stack.Pop().ToString());
            Assert.AreEqual("a(4,5,2)", stack.Pop().ToString());
            Assert.AreEqual("a(12,13,6)", stack.Pop().ToString());
            Assert.AreEqual("a(28,29,14)", stack.Pop().ToString());
            Assert.AreEqual("a(32,33,16)", stack.Pop().ToString());
        }
예제 #8
0
        public void TestFieldTermStackIndex1wSearch2terms()
        {
            MakeIndex1w();

            BooleanQuery bq = new BooleanQuery();
            bq.Add(Tq("Mac"), Occur.SHOULD);
            bq.Add(Tq("MacBook"), Occur.SHOULD);
            FieldQuery fq = new FieldQuery(bq, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(2, stack.termList.Count);
            Dictionary<String, String> expectedSet = new Dictionary<String, String>();
            expectedSet.Add("Mac(11,20,3)","");
            expectedSet.Add("MacBook(11,20,3)","");
            Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
            Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
        }
예제 #9
0
        public void Test1PhraseIndex()
        {
            Make1d1fIndex("a b");

            FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,3))", fpl.phraseList.First.Value.ToString());

            fq = new FieldQuery(Tq("b"), true, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("b(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
        }
예제 #10
0
        public void Test2Terms()
        {
            MakeIndex();

            BooleanQuery query = new BooleanQuery();
            query.Add(Tq("b"), Occur.SHOULD);
            query.Add(Tq("c"), Occur.SHOULD);
            FieldQuery fq = new FieldQuery(query, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(8, stack.termList.Count);
            Assert.AreEqual("b(6,7,3)", stack.Pop().ToString());
            Assert.AreEqual("b(8,9,4)", stack.Pop().ToString());
            Assert.AreEqual("c(10,11,5)", stack.Pop().ToString());
            Assert.AreEqual("b(14,15,7)", stack.Pop().ToString());
            Assert.AreEqual("b(16,17,8)", stack.Pop().ToString());
            Assert.AreEqual("c(18,19,9)", stack.Pop().ToString());
            Assert.AreEqual("b(26,27,13)", stack.Pop().ToString());
            Assert.AreEqual("b(30,31,15)", stack.Pop().ToString());
        }
예제 #11
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString());
        }
예제 #12
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("d(1.0)((6,7))", fpl.phraseList.First.Value.ToString());
        }
예제 #13
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("searchengines(1.0)((102,116))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("searchengines(1.0)((157,171))", fpl.phraseList.First.Next.Value.ToString());
        }
예제 #14
0
        public void Test3TermsPhrase()
        {
            Make1d1fIndex("d a b a b c d");

            FieldQuery fq = new FieldQuery(PqF("a", "b", "c"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((6,11))", fpl.phraseList.First.Value.ToString());
        }
예제 #15
0
        public void TestSearchLongestPhrase()
        {
            Make1d1fIndex("d a b d c a b c");

            BooleanQuery query = new BooleanQuery();
            query.Add(PqF("a", "b"), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
            query.Add(PqF("a", "b", "c"), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
            FieldQuery fq = new FieldQuery(query, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString());
        }
        public void TestUnstoredField()
        {
            MakeUnstoredIndex();

            FieldQuery fq = new FieldQuery(Tq("aaa"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl));
        }
예제 #17
0
        public void TestPhraseSlop()
        {
            Make1d1fIndex("c a a b c");

            FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(4, fpl.phraseList.First.Value.GetStartOffset());
            Assert.AreEqual(9, fpl.phraseList.First.Value.GetEndOffset());
        }
 private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId,
     String fieldName, int fragCharSize)
 {
     FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery);
     FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);
     return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize);
 }
예제 #19
0
        public void Test1PhraseIndexB()
        {
            // 01 12 23 34 45 56 67 78 (offsets)
            // bb|bb|ba|ac|cb|ba|ab|bc
            //  0  1  2  3  4  5  6  7 (positions)
            Make1d1fIndexB("bbbacbabc");

            FieldQuery fq = new FieldQuery(PqF("ba", "ac"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("baac(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
        }
예제 #20
0
        public void Test1PhraseMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(4, stack.termList.Count);
            Assert.AreEqual("sp(88,90,61)", stack.Pop().ToString());
            Assert.AreEqual("pe(89,91,62)", stack.Pop().ToString());
            Assert.AreEqual("ee(90,92,63)", stack.Pop().ToString());
            Assert.AreEqual("ed(91,93,64)", stack.Pop().ToString());
        }
예제 #21
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(4, stack.termList.Count);
            Assert.AreEqual("search(102,108,14)", stack.Pop().ToString());
            Assert.AreEqual("engines(109,116,15)", stack.Pop().ToString());
            Assert.AreEqual("search(157,163,24)", stack.Pop().ToString());
            Assert.AreEqual("engines(164,171,25)", stack.Pop().ToString());
        }
예제 #22
0
 /// <summary>
 /// create a FieldPhraseList that has no limit on the number of phrases to analyze
 /// <param name="fieldQuery">FieldTermStack object</param>
 /// <param name="fieldTermStack">FieldQuery object</param>
 /// </summary>
 public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery)
     : this(fieldTermStack, fieldQuery, Int32.MaxValue)
 {
 }
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t",
                sfb.CreateFragment(reader, 0, F, ffl));
        }
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }
예제 #25
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(1, stack.termList.Count);
            Assert.AreEqual("d(6,7,3)", stack.Pop().ToString());
        }
예제 #26
0
        public void Test1TermB()
        {
            makeIndexB();

            FieldQuery fq = new FieldQuery(Tq("ab"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            Assert.AreEqual(2, stack.termList.Count);
            Assert.AreEqual("ab(2,4,2)", stack.Pop().ToString());
            Assert.AreEqual("ab(6,8,6)", stack.Pop().ToString());
        }
예제 #27
0
        public void Test2PhrasesOverlap()
        {
            Make1d1fIndex("d a b c d");

            BooleanQuery query = new BooleanQuery();
            query.Add(PqF("a", "b"), Occur.SHOULD);
            query.Add(PqF("b", "c"), Occur.SHOULD);
            FieldQuery fq = new FieldQuery(query, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((2,7))", fpl.phraseList.First.Value.ToString());
        }
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl));
        }
예제 #29
0
        public void Test2ConcatTermsIndexB()
        {
            // 01 12 23 (offsets)
            // ab|ba|ab
            //  0  1  2 (positions)
            Make1d1fIndexB("abab");

            FieldQuery fq = new FieldQuery(Tq("ab"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,2))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((2,4))", fpl.phraseList.First.Next.Value.ToString());
        }
예제 #30
0
        public void Test2Terms1PhraseIndex()
        {
            Make1d1fIndex("c a a b");

            // phraseHighlight = true
            FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Value.ToString());

            // phraseHighlight = false
            fq = new FieldQuery(PqF("a", "b"), false, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Next.Value.ToString());
        }