コード例 #1
0
        public virtual string[] CreateFragments(IndexReader reader, int docId,
                                                string fieldName, FieldFragList fieldFragList, int maxNumFragments,
                                                string[] preTags, string[] postTags, IEncoder encoder)
        {
            // LUCENENET specific - added guard clauses to check for null
            if (reader is null)
            {
                throw new ArgumentNullException(nameof(reader));
            }
            if (fieldFragList is null)
            {
                throw new ArgumentNullException(nameof(fieldFragList));
            }
            if (preTags is null)
            {
                throw new ArgumentNullException(nameof(preTags));
            }
            if (postTags is null)
            {
                throw new ArgumentNullException(nameof(postTags));
            }
            if (encoder is null)
            {
                throw new ArgumentNullException(nameof(encoder));
            }

            if (maxNumFragments < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(maxNumFragments), "maxNumFragments(" + maxNumFragments + ") must be positive number."); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }

            IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos;

            Field[]
            values = GetFields(reader, docId, fieldName);
            if (values.Length == 0)
            {
                return(null);
            }

            if (discreteMultiValueHighlighting && values.Length > 1)
            {
                fragInfos = DiscreteMultiValueHighlighting(fragInfos, values);
            }

            fragInfos = GetWeightedFragInfoList(fragInfos);
            int limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count;

            JCG.List <string> fragments = new JCG.List <string>(limitFragments);

            StringBuilder buffer = new StringBuilder();

            int[] nextValueIndex = { 0 };
            for (int n = 0; n < limitFragments; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
            }
            return(fragments.ToArray(/* new String[fragments.size()] */));
        }
コード例 #2
0
        public virtual string[] CreateFragments(IndexReader reader, int docId,
                                                string fieldName, FieldFragList fieldFragList, int maxNumFragments,
                                                string[] preTags, string[] postTags, IEncoder encoder)
        {
            if (maxNumFragments < 0)
            {
                throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
            }

            IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos;

            Field[]
            values = GetFields(reader, docId, fieldName);
            if (values.Length == 0)
            {
                return(null);
            }

            if (discreteMultiValueHighlighting && values.Length > 1)
            {
                fragInfos = DiscreteMultiValueHighlighting(fragInfos, values);
            }

            fragInfos = GetWeightedFragInfoList(fragInfos);
            int           limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count;
            List <string> fragments      = new List <string>(limitFragments);

            StringBuilder buffer = new StringBuilder();

            int[] nextValueIndex = { 0 };
            for (int n = 0; n < limitFragments; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
            }
            return(fragments.ToArray(/* new String[fragments.size()] */));
        }
コード例 #3
0
        protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields)
        {
            IDictionary <string, List <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, List <WeightedFragInfo> >();

            foreach (Field field in fields)
            {
                fieldNameToFragInfos[field.Name] = new List <WeightedFragInfo>();
            }

            foreach (WeightedFragInfo fragInfo in fragInfos)
            {
                int fieldStart;
                int fieldEnd = 0;
                foreach (Field field in fields)
                {
                    if (field.GetStringValue().Length == 0)
                    {
                        fieldEnd++;
                        continue;
                    }
                    fieldStart = fieldEnd;
                    fieldEnd  += field.GetStringValue().Length + 1; // + 1 for going to next field with same name.

                    if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart &&
                        fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd)
                    {
                        fieldNameToFragInfos[field.Name].Add(fragInfo);

                        goto fragInfos_continue;
                    }

                    if (!fragInfo.SubInfos.Any())
                    {
                        goto fragInfos_continue;
                    }

                    Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0];
                    if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd)
                    {
                        continue;
                    }

                    int fragStart = fieldStart;
                    if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd)
                    {
                        fragStart = fragInfo.StartOffset;
                    }

                    int fragEnd = fieldEnd;
                    if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd)
                    {
                        fragEnd = fragInfo.EndOffset;
                    }

                    // LUCENENET specific - track the fragInfo.SubInfos items to delete
                    List <SubInfo> fragInfo_SubInfos_ToDelete = new List <SubInfo>();

                    List <SubInfo> subInfos = new List <SubInfo>();
                    float          boost    = 0.0f; //  The boost of the new info will be the sum of the boosts of its SubInfos
                    using (IEnumerator <SubInfo> subInfoIterator = fragInfo.SubInfos.GetEnumerator())
                    {
                        while (subInfoIterator.MoveNext())
                        {
                            SubInfo      subInfo   = subInfoIterator.Current;
                            List <Toffs> toffsList = new List <Toffs>();


                            using (IEnumerator <Toffs> toffsIterator = subInfo.TermsOffsets.GetEnumerator())
                            {
                                while (toffsIterator.MoveNext())
                                {
                                    Toffs toffs = toffsIterator.Current;
                                    if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd)
                                    {
                                        toffsList.Add(toffs);
                                        //toffsIterator.Remove();
                                    }
                                }
                            }
                            if (toffsList.Any())
                            {
                                // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator),
                                // we just remove the items at this point. We only get here if there are items to remove.
                                subInfo.TermsOffsets.RemoveAll(toffsList);

                                subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost));
                                boost += subInfo.Boost;
                            }

                            if (!subInfo.TermsOffsets.Any())
                            {
                                //subInfoIterator.Remove();
                                fragInfo_SubInfos_ToDelete.Add(subInfo);
                            }
                        }
                    }

                    // LUCENENET specific - now that we are done iterating the loop, it is safe to delete
                    // the items we earmarked. Note this is just a list of pointers, so it doens't consume
                    // much RAM.
                    fragInfo.SubInfos.RemoveAll(fragInfo_SubInfos_ToDelete);


                    WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost);
                    fieldNameToFragInfos[field.Name].Add(weightedFragInfo);
                }
                fragInfos_continue : { }
            }

            List <WeightedFragInfo> result = new List <WeightedFragInfo>();

            foreach (List <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values)
            {
                result.AddRange(weightedFragInfos);
            }
            CollectionUtil.TimSort(result, new DiscreteMultiValueHighlightingComparerAnonymousHelper());

            return(result);
        }
コード例 #4
0
        protected virtual string MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
                                              string[] preTags, string[] postTags, IEncoder encoder)
        {
            StringBuilder fragment = new StringBuilder();
            int           s        = fragInfo.StartOffset;

            int[]  modifiedStartOffset = { s };
            string src      = GetFragmentSourceMSO(buffer, index, values, s, fragInfo.EndOffset, modifiedStartOffset);
            int    srcIndex = 0;

            foreach (SubInfo subInfo in fragInfo.SubInfos)
            {
                foreach (Toffs to in subInfo.TermsOffsets)
                {
                    fragment
                    .Append(encoder.EncodeText(src.Substring(srcIndex, (to.StartOffset - modifiedStartOffset[0]) - srcIndex)))
                    .Append(GetPreTag(preTags, subInfo.Seqnum))
                    .Append(encoder.EncodeText(src.Substring(to.StartOffset - modifiedStartOffset[0], (to.EndOffset - modifiedStartOffset[0]) - (to.StartOffset - modifiedStartOffset[0]))))
                    .Append(GetPostTag(postTags, subInfo.Seqnum));
                    srcIndex = to.EndOffset - modifiedStartOffset[0];
                }
            }
            fragment.Append(encoder.EncodeText(src.Substring(srcIndex)));
            return(fragment.ToString());
        }
コード例 #5
0
        protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields)
        {
            IDictionary <string, IList <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, IList <WeightedFragInfo> >();

            foreach (Field field in fields)
            {
                fieldNameToFragInfos[field.Name] = new JCG.List <WeightedFragInfo>();
            }

            foreach (WeightedFragInfo fragInfo in fragInfos)
            {
                int fieldStart;
                int fieldEnd = 0;
                foreach (Field field in fields)
                {
                    if (field.GetStringValue().Length == 0)
                    {
                        fieldEnd++;
                        continue;
                    }
                    fieldStart = fieldEnd;
                    fieldEnd  += field.GetStringValue().Length + 1; // + 1 for going to next field with same name.

                    if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart &&
                        fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd)
                    {
                        fieldNameToFragInfos[field.Name].Add(fragInfo);

                        goto fragInfos_continue;
                    }

                    if (fragInfo.SubInfos.Count == 0)
                    {
                        goto fragInfos_continue;
                    }

                    Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0];
                    if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd)
                    {
                        continue;
                    }

                    int fragStart = fieldStart;
                    if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd)
                    {
                        fragStart = fragInfo.StartOffset;
                    }

                    int fragEnd = fieldEnd;
                    if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd)
                    {
                        fragEnd = fragInfo.EndOffset;
                    }

                    // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator),
                    // We use the IList<T>.RemoveAll() extension method of J2N. This removal happens in a forward way, but since it
                    // accepts a predicate, we can put in the rest of Lucene's logic without doing something expensive like keeping
                    // track of the items to remove in a separate collection. In a nutshell, any time Lucene calls iterator.remove(),
                    // we return true and any time it is skipped, we return false.

                    IList <SubInfo> subInfos = new JCG.List <SubInfo>();
                    float           boost    = 0.0f; //  The boost of the new info will be the sum of the boosts of its SubInfos
                    fragInfo.SubInfos.RemoveAll((subInfo) =>
                    {
                        IList <Toffs> toffsList = new JCG.List <Toffs>();
                        subInfo.TermsOffsets.RemoveAll((toffs) =>
                        {
                            if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd)
                            {
                                toffsList.Add(toffs);
                                return(true); // Remove
                            }
                            return(false);
                        });
                        if (toffsList.Count > 0)
                        {
                            subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost));
                            boost += subInfo.Boost;
                        }

                        if (subInfo.TermsOffsets.Count == 0)
                        {
                            return(true); // Remove
                        }
                        return(false);
                    });

                    WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost);
                    fieldNameToFragInfos[field.Name].Add(weightedFragInfo);
                }
                fragInfos_continue : { }
            }

            JCG.List <WeightedFragInfo> result = new JCG.List <WeightedFragInfo>();
            foreach (IList <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values)
            {
                result.AddRange(weightedFragInfos);
            }
            CollectionUtil.TimSort(result, Comparer <WeightedFragInfo> .Create((info1, info2) => info1.StartOffset - info2.StartOffset));

            return(result);
        }