public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
        {
            if (fragCharSize < MIN_FRAG_CHAR_SIZE)
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " +
                    MIN_FRAG_CHAR_SIZE + " or higher.");

            FieldFragList ffl = new FieldFragList(fragCharSize);

            List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>();
            LinkedList<WeightedPhraseInfo>.Enumerator ite = fieldPhraseList.phraseList.GetEnumerator();

            WeightedPhraseInfo phraseInfo = null;
            int startOffset = 0;
            bool taken = false;
            while (true)
            {
                if (!taken)
                {
                    if (!ite.MoveNext()) break;
                    phraseInfo = ite.Current;
                }
                taken = false;
                if (phraseInfo == null) break;

                // if the phrase violates the border of previous fragment, discard it and try next phrase
                if (phraseInfo.StartOffset < startOffset)
                {
                    if(phraseInfo.EndOffset < startOffset)
                        continue;
                    startOffset = phraseInfo.StartOffset;
                }

                wpil.Clear();
                wpil.Add(phraseInfo);
                int st = phraseInfo.StartOffset - MARGIN < startOffset ?
                    startOffset : phraseInfo.StartOffset - MARGIN;
                int en = st + fragCharSize;
                if (phraseInfo.EndOffset > en)
                    en = phraseInfo.EndOffset;
                startOffset = en;

                while (true)
                {
                    if (ite.MoveNext())
                    {
                        phraseInfo = ite.Current;
                        taken = true;
                        if (phraseInfo == null) break;
                    }
                    else
                        break;
                    if (phraseInfo.EndOffset <= en)
                        wpil.Add(phraseInfo);
                    else
                        break;
                }
                ffl.Add(st, en, wpil);
            }
            return ffl;
        }
Beispiel #2
0
        public void TestSmallerFragSizeThanTermQuery()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("abcdefghijklmnopqrs", "abcdefghijklmnopqrs"), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos[0].ToString());
        }
Beispiel #3
0
        public void Test1TermIndex()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("a", "a"), 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos[0].ToString());
        }
Beispiel #4
0
        public void TestPhraseQuerySlop()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("\"a b\"~1", "a c b"), 20);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos[0].ToString());
        }
Beispiel #5
0
        public void TestSmallerFragSizeThanPhraseQuery()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("\"abcdefgh jklmnopqrs\"", "abcdefgh   jklmnopqrs"), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Console.WriteLine(ffl.fragInfos[0].ToString());
            Assert.AreEqual("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos[0].ToString());
        }
Beispiel #6
0
        public void Test1TermIndex()
        {
            FieldFragList          ffl = this.ffl("a", "a");
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();

            Assert.AreEqual("<b>a</b>", sfb.CreateFragment(reader, 0, F, ffl));

            // change tags
            sfb = new SimpleFragmentsBuilder(new String[] { "[" }, new String[] { "]" });
            Assert.AreEqual("[a]", sfb.CreateFragment(reader, 0, F, ffl));
        }
Beispiel #7
0
        public void Test3Frags()
        {
            FieldFragList          ffl = this.ffl("a c", "a b b b b b b b b b b b a b a b b b b b c a a b b");
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();

            String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3);
            Assert.AreEqual(3, f.Length);
            Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[0]);
            Assert.AreEqual("b b <b>a</b> b <b>a</b> b b b b b ", f[1]);
            Assert.AreEqual("<b>c</b> <b>a</b> <b>a</b> b b", f[2]);
        }
Beispiel #8
0
        public void Test2Frags()
        {
            FieldFragList          ffl = this.ffl("a", "a b b b b b b b b b b b a b a b");
            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();

            String[] f = sfb.CreateFragments(reader, 0, F, ffl, 3);
            // 3 snippets requested, but should be 2
            Assert.AreEqual(2, f.Length);
            Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[0]);
            Assert.AreEqual("b b <b>a</b> b <b>a</b> b", f[1]);
        }
Beispiel #9
0
        public void Test3Frags()
        {
            FieldFragList ffl = this.Ffl("a c", "a b b b b b b b b b b b a b a b b b b b c a a b b");
            ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();

            String[] f = sofb.CreateFragments(reader, 0, F, ffl, 3, null);
            Assert.AreEqual(3, f.Length);
            // check score order
            Assert.AreEqual("<b>c</b> <b>a</b> <b>a</b> b b", f[0]);
            Assert.AreEqual("b b <b>a</b> b <b>a</b> b b b b b ", f[1]);
            Assert.AreEqual("<b>a</b> b b b b b b b b b ", f[2]);
        }
Beispiel #10
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery            fq    = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos[0].ToString());
        }
Beispiel #11
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery            fq    = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos[0].ToString());
        }
Beispiel #12
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery            fq    = new FieldQuery(Tq("d"), true, true);
            FieldTermStack        stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList       fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder sflb  = new SimpleFragListBuilder();
            FieldFragList         ffl   = sflb.CreateFieldFragList(fpl, 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos[0].ToString());
        }
Beispiel #13
0
        public void Test1PhraseShortMV()
        {
            MakeIndexShortMV();

            FieldQuery             fq    = new FieldQuery(Tq("d"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual("a b c <b>d</b> e", sfb.CreateFragment(reader, 0, F, ffl));
        }
Beispiel #14
0
        public void TestUnstoredField()
        {
            MakeUnstoredIndex();

            FieldQuery             fq    = new FieldQuery(Tq("aaa"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.IsNull(sfb.CreateFragment(reader, 0, F, ffl));
        }
Beispiel #15
0
        public void Test1PhraseLongMVB()
        {
            MakeIndexLongMVB();

            FieldQuery             fq    = new FieldQuery(PqF("sp", "pe", "ee", "ed"), true, true); // "speed" -(2gram)-> "sp","pe","ee","ed"
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual("ssing <b>speed</b>, the", sfb.CreateFragment(reader, 0, F, ffl));
        }
Beispiel #16
0
        public void Test1PhraseLongMV()
        {
            MakeIndexLongMV();

            FieldQuery             fq    = new FieldQuery(PqF("search", "engines"), true, true);
            FieldTermStack         stack = new FieldTermStack(reader, 0, F, fq);
            FieldPhraseList        fpl   = new FieldPhraseList(stack, fq);
            SimpleFragListBuilder  sflb  = new SimpleFragListBuilder();
            FieldFragList          ffl   = sflb.CreateFieldFragList(fpl, 100);
            SimpleFragmentsBuilder sfb   = new SimpleFragmentsBuilder();

            Assert.AreEqual(" most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t",
                            sfb.CreateFragment(reader, 0, F, ffl));
        }
Beispiel #17
0
        public void Test2TermsQuery()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("a b", "c d e"), 20);

            Assert.AreEqual(0, ffl.fragInfos.Count);

            ffl = sflb.CreateFieldFragList(fpl("a b", "d b c"), 20);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos[0].ToString());

            ffl = sflb.CreateFieldFragList(fpl("a b", "a b c"), 20);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos[0].ToString());
        }
Beispiel #18
0
        public void Test2TermsIndex1Frag()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("a", "a a"), 100);

            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos[0].ToString());

            ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b a"), 20);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos[0].ToString());

            ffl = sflb.CreateFieldFragList(fpl("a", "b b b b a b b b b a"), 20);
            Assert.AreEqual(1, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos[0].ToString());
        }
        public virtual String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments)
        {
            if (maxNumFragments < 0)
                throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");

            List<WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);

            List<String> fragments = new List<String>(maxNumFragments);
            Field[] values = GetFields(reader, docId, fieldName);
            if (values.Length == 0) return null;
            StringBuilder buffer = new StringBuilder();
            int[] nextValueIndex = { 0 };
            for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo));
            }
            return fragments.ToArray();
        }
Beispiel #20
0
        public void Test2TermsIndex2Frags()
        {
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
            FieldFragList         ffl  = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b b b b b a"), 20);

            Assert.AreEqual(2, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString());
            Assert.AreEqual("subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos[1].ToString());

            ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b b b b a"), 20);
            Assert.AreEqual(2, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString());
            Assert.AreEqual("subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos[1].ToString());

            ffl = sflb.CreateFieldFragList(fpl("a", "a b b b b b b b b b a"), 20);
            Assert.AreEqual(2, ffl.fragInfos.Count);
            Assert.AreEqual("subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos[0].ToString());
            Assert.AreEqual("subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos[1].ToString());
        }
 public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList)
 {
     String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1);
     if (fragments == null || fragments.Length == 0) return null;
     return fragments[0];
 }
Beispiel #22
0
        public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, int fragCharSize, IState state)
        {
            if (maxNumFragments < 0)
            {
                throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
            }

            List <WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);

            List <String> fragments = new List <String>(maxNumFragments);

            Field[] values = GetFields(reader, docId, fieldName, state);
            if (values.Length == 0)
            {
                return(null);
            }
            StringBuilder buffer = new StringBuilder();

            int[] nextValueIndex = { 0 };
            for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, fragCharSize, state));
            }
            return(fragments.ToArray());
        }
Beispiel #23
0
 public virtual string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int fragCharSize, IState state)
 {
     String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1, fragCharSize, state);
     if (fragments == null || fragments.Length == 0)
     {
         return(null);
     }
     return(fragments[0]);
 }
Beispiel #24
0
 public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList)
 {
     String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1);
     if (fragments == null || fragments.Length == 0)
     {
         return(null);
     }
     return(fragments[0]);
 }
Beispiel #25
0
        public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
        {
            if (fragCharSize < MIN_FRAG_CHAR_SIZE)
            {
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " +
                                            MIN_FRAG_CHAR_SIZE + " or higher.");
            }

            FieldFragList ffl = new FieldFragList(fragCharSize);

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            LinkedList <WeightedPhraseInfo> .Enumerator ite = fieldPhraseList.phraseList.GetEnumerator();

            WeightedPhraseInfo phraseInfo = null;
            int  startOffset = 0;
            bool taken       = false;

            while (true)
            {
                if (!taken)
                {
                    if (!ite.MoveNext())
                    {
                        break;
                    }
                    phraseInfo = ite.Current;
                }
                taken = false;
                if (phraseInfo == null)
                {
                    break;
                }

                // if the phrase violates the border of previous fragment, discard it and try next phrase
                if (phraseInfo.StartOffset < startOffset)
                {
                    if (phraseInfo.EndOffset < startOffset)
                    {
                        continue;
                    }
                    startOffset = phraseInfo.StartOffset;
                }

                wpil.Clear();
                wpil.Add(phraseInfo);
                int st = phraseInfo.StartOffset - MARGIN < startOffset ?
                         startOffset : phraseInfo.StartOffset - MARGIN;
                int en = st + fragCharSize;
                if (phraseInfo.EndOffset > en)
                {
                    en = phraseInfo.EndOffset;
                }
                startOffset = en;

                while (true)
                {
                    if (ite.MoveNext())
                    {
                        phraseInfo = ite.Current;
                        taken      = true;
                        if (phraseInfo == null)
                        {
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                    if (phraseInfo.EndOffset <= en)
                    {
                        wpil.Add(phraseInfo);
                    }
                    else
                    {
                        break;
                    }
                }
                ffl.Add(st, en, wpil);
            }
            return(ffl);
        }
        /// <summary>
        /// return the best fragments.
        /// </summary>
        /// <param name="fieldQuery">FieldQuery object</param>
        /// <param name="reader">IndexReader of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="fieldName">field of the document to be highlighted</param>
        /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
        /// <param name="maxNumFragments">maximum number of fragments</param>
        /// <param name="fragCharSize1"></param>
        /// <returns>created fragments or null when no fragments created. Size of the array can be less than maxNumFragments</returns>
        public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId, string fieldName, int fragCharSize, int maxNumFragments, IState state)
        {
            FieldFragList fieldFragList = GetFieldFragList(fieldQuery, reader, docId, fieldName, fragCharSize, state);

            return(fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments, fragCharSize, state));
        }