Пример #1
0
        public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
        {
            if (fragCharSize < MIN_FRAG_CHAR_SIZE)
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " +
                    MIN_FRAG_CHAR_SIZE + " or higher.");

            FieldFragList ffl = new FieldFragList(fragCharSize);

            List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>();
            LinkedList<WeightedPhraseInfo>.Enumerator ite = fieldPhraseList.phraseList.GetEnumerator();

            WeightedPhraseInfo phraseInfo = null;
            int startOffset = 0;
            bool taken = false;
            while (true)
            {
                if (!taken)
                {
                    if (!ite.MoveNext()) break;
                    phraseInfo = ite.Current;
                }
                taken = false;
                if (phraseInfo == null) break;

                // if the phrase violates the border of previous fragment, discard it and try next phrase
                if (phraseInfo.StartOffset < startOffset)
                {
                    if(phraseInfo.EndOffset < startOffset)
                        continue;
                    startOffset = phraseInfo.StartOffset;
                }

                wpil.Clear();
                wpil.Add(phraseInfo);
                int st = phraseInfo.StartOffset - MARGIN < startOffset ?
                    startOffset : phraseInfo.StartOffset - MARGIN;
                int en = st + fragCharSize;
                if (phraseInfo.EndOffset > en)
                    en = phraseInfo.EndOffset;
                startOffset = en;

                while (true)
                {
                    if (ite.MoveNext())
                    {
                        phraseInfo = ite.Current;
                        taken = true;
                        if (phraseInfo == null) break;
                    }
                    else
                        break;
                    if (phraseInfo.EndOffset <= en)
                        wpil.Add(phraseInfo);
                    else
                        break;
                }
                ffl.Add(st, en, wpil);
            }
            return ffl;
        }
Пример #2
0
        private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId,
                                               String fieldName, int fragCharSize, IState state)
        {
            FieldTermStack  fieldTermStack  = new FieldTermStack(reader, docId, fieldName, fieldQuery, state);
            FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);

            return(fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize));
        }
 private FieldFragList Ffl(String queryValue, String indexValue)
 {
     Make1d1fIndex(indexValue);
     Query query = paW.Parse(queryValue);
     FieldQuery fq = new FieldQuery(query, true, true);
     FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
     FieldPhraseList fpl = new FieldPhraseList(stack, fq);
     return new SimpleFragListBuilder().CreateFieldFragList(fpl, 20);
 }
Пример #4
0
        private FieldFragList Ffl(String queryValue, String indexValue)
        {
            Make1d1fIndex(indexValue);
            Query           query = paW.Parse(queryValue);
            FieldQuery      fq    = new FieldQuery(query, true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            return(new SimpleFragListBuilder().CreateFieldFragList(fpl, 20));
        }
Пример #5
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery      fq    = new FieldQuery(Tq("d"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("d(1.0)((6,7))", fpl.phraseList.First.Value.ToString());
        }
Пример #6
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery      fq    = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString());
        }
Пример #7
0
        public void Test2TermsIndex()
        {
            Make1d1fIndex("a a");

            FieldQuery fq = new FieldQuery(Tq("a"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((0,1))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #8
0
        public void Test3TermsPhrase()
        {
            Make1d1fIndex("d a b a b c d");

            FieldQuery      fq    = new FieldQuery(PqF("a", "b", "c"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((6,11))", fpl.phraseList.First.Value.ToString());
        }
Пример #9
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery      fq    = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("searchengines(1.0)((102,116))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("searchengines(1.0)((157,171))", fpl.phraseList.First.Next.Value.ToString());
        }
        public void TestCompleteSearchInLongMV()
        {
            MakeIndexLongMV();

            FieldQuery      fq    = new FieldQuery(Preq("engin"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("engines(1.0)((109,116))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("engines(1.0)((164,171))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #11
0
        public void Test2TermsIndex()
        {
            Make1d1fIndex("a a");

            FieldQuery      fq    = new FieldQuery(Tq("a"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((0,1))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #12
0
        public void TestPhraseSlop()
        {
            Make1d1fIndex("c a a b c");

            FieldQuery      fq    = new FieldQuery(pqF(2F, 1, "a", "c"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(4, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(9, fpl.phraseList.First.Value.EndOffset);
        }
Пример #13
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery            fq    = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos[0].ToString());
        }
Пример #14
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery             fq    = new FieldQuery(Tq("d"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #15
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery             fq    = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #16
0
        public void TestUnstoredField()
        {
            MakeUnstoredIndex();

            FieldQuery             fq    = new FieldQuery(Tq("aaa"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #17
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery            fq    = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos[0].ToString());
        }
Пример #18
0
        public void TestFieldPhraseListIndex1w2wSearch1phrase()
        {
            MakeIndex1w2w();

            FieldQuery      fq    = new FieldQuery(PqF("personal", "computer"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("personalcomputer(1.0)((3,5))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset);
        }
Пример #19
0
        public void TestFieldPhraseListIndex2w1wSearch1partial()
        {
            MakeIndex2w1w();

            FieldQuery      fq    = new FieldQuery(Tq("computer"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("computer(1.0)((3,20))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
        }
Пример #20
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery            fq    = new FieldQuery(Tq("d"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos[0].ToString());
        }
Пример #21
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery             fq    = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t",
                            sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #22
0
        public void Test1PhraseIndexB()
        {
            // 01 12 23 34 45 56 67 78 (offsets)
            // bb|bb|ba|ac|cb|ba|ab|bc
            //  0  1  2  3  4  5  6  7 (positions)
            Make1d1fIndexB("bbbacbabc");

            FieldQuery      fq    = new FieldQuery(PqF("ba", "ac"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("baac(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
        }
Пример #23
0
        public void Test2ConcatTermsIndexB()
        {
            // 01 12 23 (offsets)
            // ab|ba|ab
            //  0  1  2 (positions)
            Make1d1fIndexB("abab");

            FieldQuery      fq    = new FieldQuery(Tq("ab"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,2))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((2,4))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #24
0
        public void Test2PhrasesOverlap()
        {
            Make1d1fIndex("d a b c d");

            BooleanQuery query = new BooleanQuery();

            query.Add(PqF("a", "b"), Occur.SHOULD);
            query.Add(PqF("b", "c"), Occur.SHOULD);
            FieldQuery      fq    = new FieldQuery(query, true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((2,7))", fpl.phraseList.First.Value.ToString());
        }
Пример #25
0
        public void Test1PhraseIndex()
        {
            Make1d1fIndex("a b");

            FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,3))", fpl.phraseList.First.Value.ToString());

            fq = new FieldQuery(Tq("b"), true, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("b(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
        }
Пример #26
0
        public void TestSearchLongestPhrase()
        {
            Make1d1fIndex("d a b d c a b c");

            BooleanQuery query = new BooleanQuery();

            query.Add(PqF("a", "b"), Occur.SHOULD);
            query.Add(PqF("a", "b", "c"), Occur.SHOULD);
            FieldQuery      fq    = new FieldQuery(query, true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #27
0
        public void Test1PhraseIndex()
        {
            Make1d1fIndex("a b");

            FieldQuery      fq    = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,3))", fpl.phraseList.First.Value.ToString());

            fq    = new FieldQuery(Tq("b"), true, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl   = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("b(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
        }
Пример #28
0
        public void TestFieldPhraseListIndex2w1wSearch1term1phrase()
        {
            MakeIndex2w1w();

            BooleanQuery bq = new BooleanQuery();

            bq.Add(Tq("pc"), Occur.SHOULD);
            bq.Add(PqF("personal", "computer"), Occur.SHOULD);
            FieldQuery      fq    = new FieldQuery(bq, true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.IsTrue(fpl.phraseList.First.Value.ToString().IndexOf("(1.0)((3,20))") > 0);
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
        }
Пример #29
0
        public void Test2Terms1PhraseIndex()
        {
            Make1d1fIndex("c a a b");

            // phraseHighlight = true
            FieldQuery      fq    = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack  stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl   = new FieldPhraseList(stack, fq);

            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Value.ToString());

            // phraseHighlight = false
            fq    = new FieldQuery(PqF("a", "b"), false, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl   = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #30
0
        public void TestPhraseSlop()
        {
            Make1d1fIndex("c a a b c");

            FieldQuery fq = new FieldQuery(pqF(2F, 1, "a", "c"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ac(2.0)((4,5)(8,9))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(4, fpl.phraseList.First.Value.GetStartOffset());
            Assert.AreEqual(9, fpl.phraseList.First.Value.GetEndOffset());
        }
Пример #31
0
        public void Test3TermsPhrase()
        {
            Make1d1fIndex("d a b a b c d");

            FieldQuery fq = new FieldQuery(PqF("a", "b", "c"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((6,11))", fpl.phraseList.First.Value.ToString());
        }
Пример #32
0
        public void Test2PhrasesOverlap()
        {
            Make1d1fIndex("d a b c d");

            BooleanQuery query = new BooleanQuery();
            query.Add(PqF("a", "b"), Occur.SHOULD);
            query.Add(PqF("b", "c"), Occur.SHOULD);
            FieldQuery fq = new FieldQuery(query, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("abc(1.0)((2,7))", fpl.phraseList.First.Value.ToString());
        }
Пример #33
0
        public void Test2Terms1PhraseIndex()
        {
            Make1d1fIndex("c a a b");

            // phraseHighlight = true
            FieldQuery fq = new FieldQuery(PqF("a", "b"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Value.ToString());

            // phraseHighlight = false
            fq = new FieldQuery(PqF("a", "b"), false, true);
            stack = new FieldTermStack(reader, 0, F, fq);
            fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("a(1.0)((2,3))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((4,7))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #34
0
        public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
        {
            if (fragCharSize < MIN_FRAG_CHAR_SIZE)
            {
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " +
                                            MIN_FRAG_CHAR_SIZE + " or higher.");
            }

            FieldFragList ffl = new FieldFragList(fragCharSize);

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            LinkedList <WeightedPhraseInfo> .Enumerator ite = fieldPhraseList.phraseList.GetEnumerator();

            WeightedPhraseInfo phraseInfo = null;
            int  startOffset = 0;
            bool taken       = false;

            while (true)
            {
                if (!taken)
                {
                    if (!ite.MoveNext())
                    {
                        break;
                    }
                    phraseInfo = ite.Current;
                }
                taken = false;
                if (phraseInfo == null)
                {
                    break;
                }

                // if the phrase violates the border of previous fragment, discard it and try next phrase
                if (phraseInfo.StartOffset < startOffset)
                {
                    if (phraseInfo.EndOffset < startOffset)
                    {
                        continue;
                    }
                    startOffset = phraseInfo.StartOffset;
                }

                wpil.Clear();
                wpil.Add(phraseInfo);
                int st = phraseInfo.StartOffset - MARGIN < startOffset ?
                         startOffset : phraseInfo.StartOffset - MARGIN;
                int en = st + fragCharSize;
                if (phraseInfo.EndOffset > en)
                {
                    en = phraseInfo.EndOffset;
                }
                startOffset = en;

                while (true)
                {
                    if (ite.MoveNext())
                    {
                        phraseInfo = ite.Current;
                        taken      = true;
                        if (phraseInfo == null)
                        {
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                    if (phraseInfo.EndOffset <= en)
                    {
                        wpil.Add(phraseInfo);
                    }
                    else
                    {
                        break;
                    }
                }
                ffl.Add(st, en, wpil);
            }
            return(ffl);
        }
 private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId,
     String fieldName, int fragCharSize)
 {
     FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery);
     FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);
     return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize);
 }
Пример #36
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("searchengines(1.0)((102,116))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("searchengines(1.0)((157,171))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #37
0
        public void TestFieldPhraseListIndex2w1wSearch1partial()
        {
            MakeIndex2w1w();

            FieldQuery fq = new FieldQuery(Tq("computer"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("computer(1.0)((3,20))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
        }
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos[0].ToString());
        }
Пример #39
0
        public void TestSearchLongestPhrase()
        {
            Make1d1fIndex("d a b d c a b c");

            BooleanQuery query = new BooleanQuery();
            query.Add(PqF("a", "b"), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
            query.Add(PqF("a", "b", "c"), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
            FieldQuery fq = new FieldQuery(query, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("abc(1.0)((10,15))", fpl.phraseList.First.Next.Value.ToString());
        }
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t",
                sfb.CreateFragment(reader, 0, F, ffl));
        }
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos[0].ToString());
        }
Пример #42
0
        public void Test1PhraseIndexB()
        {
            // 01 12 23 34 45 56 67 78 (offsets)
            // bb|bb|ba|ac|cb|ba|ab|bc
            //  0  1  2  3  4  5  6  7 (positions)
            Make1d1fIndexB("bbbacbabc");

            FieldQuery fq = new FieldQuery(PqF("ba", "ac"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("baac(1.0)((2,5))", fpl.phraseList.First.Value.ToString());
        }
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #44
0
        public void TestCompleteSearchInLongMV()
        {
            MakeIndexLongMV();

            FieldQuery fq = new FieldQuery(Preq("engin"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("engines(1.0)((109,116))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("engines(1.0)((164,171))", fpl.phraseList.First.Next.Value.ToString());
        }
Пример #45
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("d(1.0)((6,7))", fpl.phraseList.First.Value.ToString());
        }
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos[0].ToString());
        }
Пример #47
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery fq = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString());
        }
Пример #48
0
        public void TestFieldPhraseListIndex2w1wSearch1term1phrase()
        {
            MakeIndex2w1w();

            BooleanQuery bq = new BooleanQuery();
            bq.Add(Tq("pc"), Occur.SHOULD);
            bq.Add(PqF("personal", "computer"), Occur.SHOULD);
            FieldQuery fq = new FieldQuery(bq, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.IsTrue(fpl.phraseList.First.Value.ToString().IndexOf("(1.0)((3,20))") > 0);
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
        }
        public void TestUnstoredField()
        {
            MakeUnstoredIndex();

            FieldQuery fq = new FieldQuery(Tq("aaa"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl));
        }
Пример #50
0
        public void TestFieldPhraseListIndex1w2wSearch1phrase()
        {
            MakeIndex1w2w();

            FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(1, fpl.phraseList.Count);
            Assert.AreEqual("personalcomputer(1.0)((3,5))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
            Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset);
        }
Пример #51
0
        public void Test2ConcatTermsIndexB()
        {
            // 01 12 23 (offsets)
            // ab|ba|ab
            //  0  1  2 (positions)
            Make1d1fIndexB("abab");

            FieldQuery fq = new FieldQuery(Tq("ab"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            Assert.AreEqual(2, fpl.phraseList.Count);
            Assert.AreEqual("ab(1.0)((0,2))", fpl.phraseList.First.Value.ToString());
            Assert.AreEqual("ab(1.0)((2,4))", fpl.phraseList.First.Next.Value.ToString());
        }
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery fq = new FieldQuery(Tq("d"), true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
            Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }