protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields) { IDictionary <string, List <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, List <WeightedFragInfo> >(); foreach (Field field in fields) { fieldNameToFragInfos[field.Name] = new List <WeightedFragInfo>(); } foreach (WeightedFragInfo fragInfo in fragInfos) { int fieldStart; int fieldEnd = 0; foreach (Field field in fields) { if (field.GetStringValue().Length == 0) { fieldEnd++; continue; } fieldStart = fieldEnd; fieldEnd += field.GetStringValue().Length + 1; // + 1 for going to next field with same name. if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart && fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd) { fieldNameToFragInfos[field.Name].Add(fragInfo); goto fragInfos_continue; } if (!fragInfo.SubInfos.Any()) { goto fragInfos_continue; } Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0]; if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd) { continue; } int fragStart = fieldStart; if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd) { fragStart = fragInfo.StartOffset; } int fragEnd = fieldEnd; if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd) { fragEnd = fragInfo.EndOffset; } // LUCENENET specific - track the fragInfo.SubInfos items to delete List <SubInfo> fragInfo_SubInfos_ToDelete = new List <SubInfo>(); List <SubInfo> subInfos = new List <SubInfo>(); float boost = 0.0f; // The boost of the new info will be the sum of the boosts of its SubInfos using (IEnumerator <SubInfo> subInfoIterator = fragInfo.SubInfos.GetEnumerator()) { while (subInfoIterator.MoveNext()) { SubInfo subInfo = subInfoIterator.Current; List <Toffs> toffsList = new List <Toffs>(); using (IEnumerator <Toffs> toffsIterator = subInfo.TermsOffsets.GetEnumerator()) { while (toffsIterator.MoveNext()) { Toffs toffs = toffsIterator.Current; if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd) { toffsList.Add(toffs); //toffsIterator.Remove(); } } } if (toffsList.Any()) { // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator), // we just remove the items at this point. We only get here if there are items to remove. subInfo.TermsOffsets.RemoveAll(toffsList); subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost)); boost += subInfo.Boost; } if (!subInfo.TermsOffsets.Any()) { //subInfoIterator.Remove(); fragInfo_SubInfos_ToDelete.Add(subInfo); } } } // LUCENENET specific - now that we are done iterating the loop, it is safe to delete // the items we earmarked. Note this is just a list of pointers, so it doens't consume // much RAM. fragInfo.SubInfos.RemoveAll(fragInfo_SubInfos_ToDelete); WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost); fieldNameToFragInfos[field.Name].Add(weightedFragInfo); } fragInfos_continue : { } } List <WeightedFragInfo> result = new List <WeightedFragInfo>(); foreach (List <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values) { result.AddRange(weightedFragInfos); } CollectionUtil.TimSort(result, new DiscreteMultiValueHighlightingComparerAnonymousHelper()); return(result); }
protected virtual IList <WeightedFragInfo> DiscreteMultiValueHighlighting(IList <WeightedFragInfo> fragInfos, Field[] fields) { IDictionary <string, IList <WeightedFragInfo> > fieldNameToFragInfos = new Dictionary <string, IList <WeightedFragInfo> >(); foreach (Field field in fields) { fieldNameToFragInfos[field.Name] = new JCG.List <WeightedFragInfo>(); } foreach (WeightedFragInfo fragInfo in fragInfos) { int fieldStart; int fieldEnd = 0; foreach (Field field in fields) { if (field.GetStringValue().Length == 0) { fieldEnd++; continue; } fieldStart = fieldEnd; fieldEnd += field.GetStringValue().Length + 1; // + 1 for going to next field with same name. if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart && fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd) { fieldNameToFragInfos[field.Name].Add(fragInfo); goto fragInfos_continue; } if (fragInfo.SubInfos.Count == 0) { goto fragInfos_continue; } Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0]; if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd) { continue; } int fragStart = fieldStart; if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd) { fragStart = fragInfo.StartOffset; } int fragEnd = fieldEnd; if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd) { fragEnd = fragInfo.EndOffset; } // LUCENENET NOTE: Instead of removing during iteration (which isn't allowed in .NET when using an IEnumerator), // We use the IList<T>.RemoveAll() extension method of J2N. This removal happens in a forward way, but since it // accepts a predicate, we can put in the rest of Lucene's logic without doing something expensive like keeping // track of the items to remove in a separate collection. In a nutshell, any time Lucene calls iterator.remove(), // we return true and any time it is skipped, we return false. IList <SubInfo> subInfos = new JCG.List <SubInfo>(); float boost = 0.0f; // The boost of the new info will be the sum of the boosts of its SubInfos fragInfo.SubInfos.RemoveAll((subInfo) => { IList <Toffs> toffsList = new JCG.List <Toffs>(); subInfo.TermsOffsets.RemoveAll((toffs) => { if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd) { toffsList.Add(toffs); return(true); // Remove } return(false); }); if (toffsList.Count > 0) { subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum, subInfo.Boost)); boost += subInfo.Boost; } if (subInfo.TermsOffsets.Count == 0) { return(true); // Remove } return(false); }); WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost); fieldNameToFragInfos[field.Name].Add(weightedFragInfo); } fragInfos_continue : { } } JCG.List <WeightedFragInfo> result = new JCG.List <WeightedFragInfo>(); foreach (IList <WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values) { result.AddRange(weightedFragInfos); } CollectionUtil.TimSort(result, Comparer <WeightedFragInfo> .Create((info1, info2) => info1.StartOffset - info2.StartOffset)); return(result); }