public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, string[] preTags, string[] postTags, IEncoder encoder) { // LUCENENET specific - added guard clauses to check for null if (reader is null) { throw new ArgumentNullException(nameof(reader)); } if (fieldFragList is null) { throw new ArgumentNullException(nameof(fieldFragList)); } if (preTags is null) { throw new ArgumentNullException(nameof(preTags)); } if (postTags is null) { throw new ArgumentNullException(nameof(postTags)); } if (encoder is null) { throw new ArgumentNullException(nameof(encoder)); } if (maxNumFragments < 0) { throw new ArgumentOutOfRangeException(nameof(maxNumFragments), "maxNumFragments(" + maxNumFragments + ") must be positive number."); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos; Field[] values = GetFields(reader, docId, fieldName); if (values.Length == 0) { return(null); } if (discreteMultiValueHighlighting && values.Length > 1) { fragInfos = DiscreteMultiValueHighlighting(fragInfos, values); } fragInfos = GetWeightedFragInfoList(fragInfos); int limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count; JCG.List <string> fragments = new JCG.List <string>(limitFragments); StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < limitFragments; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder)); } return(fragments.ToArray(/* new String[fragments.size()] */)); }
public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, string[] preTags, string[] postTags, IEncoder encoder) { if (maxNumFragments < 0) { throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); } IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos; Field[] values = GetFields(reader, docId, fieldName); if (values.Length == 0) { return(null); } if (discreteMultiValueHighlighting && values.Length > 1) { fragInfos = DiscreteMultiValueHighlighting(fragInfos, values); } fragInfos = GetWeightedFragInfoList(fragInfos); int limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count; List <string> fragments = new List <string>(limitFragments); StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < limitFragments; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder)); } return(fragments.ToArray(/* new String[fragments.size()] */)); }
protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields) { IDictionary <string, List <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, List <WeightedFragInfo> >(); foreach (Field field in fields) { fieldNameToFragInfos[field.Name] = new List <WeightedFragInfo>(); } foreach (WeightedFragInfo fragInfo in fragInfos) { int fieldStart; int fieldEnd = 0; foreach (Field field in fields) { if (field.GetStringValue().Length == 0) { fieldEnd++; continue; } fieldStart = fieldEnd; fieldEnd += field.GetStringValue().Length + 1; // + 1 for going to next field with same name. if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart && fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd) { fieldNameToFragInfos[field.Name].Add(fragInfo); goto fragInfos_continue; } if (!fragInfo.SubInfos.Any()) { goto fragInfos_continue; } Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0]; if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd) { continue; } int fragStart = fieldStart; if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd) { fragStart = fragInfo.StartOffset; } int fragEnd = fieldEnd; if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd) { fragEnd = fragInfo.EndOffset; } // LUCENENET specific - track the fragInfo.SubInfos items to delete List <SubInfo> fragInfo_SubInfos_ToDelete = new List <SubInfo>(); List <SubInfo> subInfos = new List <SubInfo>(); float boost = 0.0f; // The boost of the new info will be the sum of the boosts of its SubInfos using (IEnumerator <SubInfo> subInfoIterator = fragInfo.SubInfos.GetEnumerator()) { while (subInfoIterator.MoveNext()) { SubInfo subInfo = subInfoIterator.Current; List <Toffs> toffsList = new List <Toffs>(); using (IEnumerator <Toffs> toffsIterator = subInfo.TermsOffsets.GetEnumerator()) { while (toffsIterator.MoveNext()) { Toffs toffs = toffsIterator.Current; if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd) { toffsList.Add(toffs); //toffsIterator.Remove(); } } } if (toffsList.Any()) { // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator), // we just remove the items at this point. We only get here if there are items to remove. subInfo.TermsOffsets.RemoveAll(toffsList); subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost)); boost += subInfo.Boost; } if (!subInfo.TermsOffsets.Any()) { //subInfoIterator.Remove(); fragInfo_SubInfos_ToDelete.Add(subInfo); } } } // LUCENENET specific - now that we are done iterating the loop, it is safe to delete // the items we earmarked. Note this is just a list of pointers, so it doens't consume // much RAM. fragInfo.SubInfos.RemoveAll(fragInfo_SubInfos_ToDelete); WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost); fieldNameToFragInfos[field.Name].Add(weightedFragInfo); } fragInfos_continue : { } } List <WeightedFragInfo> result = new List <WeightedFragInfo>(); foreach (List <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values) { result.AddRange(weightedFragInfos); } CollectionUtil.TimSort(result, new DiscreteMultiValueHighlightingComparerAnonymousHelper()); return(result); }
protected virtual string MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, string[] preTags, string[] postTags, IEncoder encoder) { StringBuilder fragment = new StringBuilder(); int s = fragInfo.StartOffset; int[] modifiedStartOffset = { s }; string src = GetFragmentSourceMSO(buffer, index, values, s, fragInfo.EndOffset, modifiedStartOffset); int srcIndex = 0; foreach (SubInfo subInfo in fragInfo.SubInfos) { foreach (Toffs to in subInfo.TermsOffsets) { fragment .Append(encoder.EncodeText(src.Substring(srcIndex, (to.StartOffset - modifiedStartOffset[0]) - srcIndex))) .Append(GetPreTag(preTags, subInfo.Seqnum)) .Append(encoder.EncodeText(src.Substring(to.StartOffset - modifiedStartOffset[0], (to.EndOffset - modifiedStartOffset[0]) - (to.StartOffset - modifiedStartOffset[0])))) .Append(GetPostTag(postTags, subInfo.Seqnum)); srcIndex = to.EndOffset - modifiedStartOffset[0]; } } fragment.Append(encoder.EncodeText(src.Substring(srcIndex))); return(fragment.ToString()); }
protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields) { IDictionary <string, IList <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, IList <WeightedFragInfo> >(); foreach (Field field in fields) { fieldNameToFragInfos[field.Name] = new JCG.List <WeightedFragInfo>(); } foreach (WeightedFragInfo fragInfo in fragInfos) { int fieldStart; int fieldEnd = 0; foreach (Field field in fields) { if (field.GetStringValue().Length == 0) { fieldEnd++; continue; } fieldStart = fieldEnd; fieldEnd += field.GetStringValue().Length + 1; // + 1 for going to next field with same name. if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart && fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd) { fieldNameToFragInfos[field.Name].Add(fragInfo); goto fragInfos_continue; } if (fragInfo.SubInfos.Count == 0) { goto fragInfos_continue; } Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0]; if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd) { continue; } int fragStart = fieldStart; if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd) { fragStart = fragInfo.StartOffset; } int fragEnd = fieldEnd; if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd) { fragEnd = fragInfo.EndOffset; } // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator), // We use the IList<T>.RemoveAll() extension method of J2N. This removal happens in a forward way, but since it // accepts a predicate, we can put in the rest of Lucene's logic without doing something expensive like keeping // track of the items to remove in a separate collection. In a nutshell, any time Lucene calls iterator.remove(), // we return true and any time it is skipped, we return false. IList <SubInfo> subInfos = new JCG.List <SubInfo>(); float boost = 0.0f; // The boost of the new info will be the sum of the boosts of its SubInfos fragInfo.SubInfos.RemoveAll((subInfo) => { IList <Toffs> toffsList = new JCG.List <Toffs>(); subInfo.TermsOffsets.RemoveAll((toffs) => { if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd) { toffsList.Add(toffs); return(true); // Remove } return(false); }); if (toffsList.Count > 0) { subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost)); boost += subInfo.Boost; } if (subInfo.TermsOffsets.Count == 0) { return(true); // Remove } return(false); }); WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost); fieldNameToFragInfos[field.Name].Add(weightedFragInfo); } fragInfos_continue : { } } JCG.List <WeightedFragInfo> result = new JCG.List <WeightedFragInfo>(); foreach (IList <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values) { result.AddRange(weightedFragInfos); } CollectionUtil.TimSort(result, Comparer <WeightedFragInfo> .Create((info1, info2) => info1.StartOffset - info2.StartOffset)); return(result); }