private String MakeFragment(WeightedFragInfo fragInfo, String src, int s) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; var items = from subInfo in fragInfo.subInfos from to in subInfo.termsOffsets orderby to.startOffset select new { to, subInfo }; foreach (var item in items) { var headerIndex = item.to.startOffset - s; fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex)) .Append(GetPreTag(item.subInfo.seqnum)) .Append(src.Substring(headerIndex, item.to.endOffset - item.to.startOffset)) .Append(GetPostTag(item.subInfo.seqnum)); srcIndex = item.to.endOffset - s; } fragment.Append(src.Substring(srcIndex)); return(fragment.ToString()); }
public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, int fragCharSize, IState state) { if (maxNumFragments < 0) { throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); } List <WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos); List <String> fragments = new List <String>(maxNumFragments); Field[] values = GetFields(reader, docId, fieldName, state); if (values.Length == 0) { return(null); } StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, fragCharSize, state)); } return(fragments.ToArray()); }
private String MakeFragment(WeightedFragInfo fragInfo, String src, int adjustedStart, int fragCharSize) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; var items = from subInfo in fragInfo.subInfos from to in subInfo.termsOffsets orderby to.startOffset select new { to, subInfo }; foreach (var item in items) { var headerIndex = item.to.startOffset - adjustedStart; var matchLen = item.to.endOffset - item.to.startOffset; var startLen = Math.Max(0, Math.Min(headerIndex - srcIndex, (fragCharSize - matchLen) / 2)); fragCharSize -= matchLen + startLen; fragment.Append(src.Substring(headerIndex - startLen, startLen)) .Append(GetPreTag(item.subInfo.seqnum)) .Append(src.Substring(headerIndex, matchLen)) .Append(GetPostTag(item.subInfo.seqnum)); srcIndex = item.to.endOffset - adjustedStart; } fragment.Append(src.Substring(srcIndex, Math.Min(Math.Max(0, fragCharSize), src.Length - srcIndex))); return(fragment.ToString()); }
private String MakeFragment(WeightedFragInfo fragInfo, String src, int s) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; foreach (SubInfo subInfo in fragInfo.subInfos) { foreach (Toffs to in subInfo.termsOffsets) { fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum)) .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum)); srcIndex = to.endOffset - s; } } fragment.Append(src.Substring(srcIndex)); return(fragment.ToString()); }
private string GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo weightedFragInfo, out int startOffset) { while (buffer.Length < weightedFragInfo.endOffset && index[0] < values.Length) { buffer.Append(values[index[0]].StringValue); if (values[index[0]].IsTokenized && values[index[0]].StringValue.Length > 0 && index[0] + 1 < values.Length) buffer.Append(' '); index[0]++; } var endOffset = 0; startOffset = buffer.Length - 1; foreach (var subInfo in weightedFragInfo.subInfos) { foreach (var termsOffset in subInfo.termsOffsets) { if (termsOffset.startOffset < startOffset) startOffset = termsOffset.startOffset; if (termsOffset.endOffset > endOffset) endOffset = termsOffset.endOffset; } } int maxStart = startOffset; int minEnd = endOffset; var maxLength = weightedFragInfo.endOffset - weightedFragInfo.startOffset; var bufferLength = buffer.Length; var stopChars = new[] {',', '.', ';','!','?'}; while (endOffset - startOffset < maxLength - 2) // limit the size of the returned string { if (endOffset >= bufferLength) // out of range { endOffset = bufferLength; startOffset = Math.Max(0, startOffset - maxLength - endOffset); break; } if (startOffset == 0) // out of range { endOffset = Math.Min(bufferLength, startOffset + maxLength); break; } var localStartOffset = startOffset; if (stopChars.Any(c => c == buffer[localStartOffset]) && buffer[startOffset + 1] == ' ') { startOffset += 2; //remove the char and the white space endOffset = Math.Min(bufferLength, startOffset + maxLength); break; } endOffset++; startOffset--; } int retStartOffset; var retVal = TrimEdges(buffer, startOffset, endOffset, out retStartOffset, maxStart, minEnd); // cuts part words startOffset = retStartOffset; return retVal; }
private String MakeFragment(WeightedFragInfo fragInfo, String src, int s) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; foreach (SubInfo subInfo in fragInfo.subInfos) { foreach (Toffs to in subInfo.termsOffsets) { var headerIndex = to.startOffset - s; fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex)) .Append(GetPreTag(subInfo.seqnum)) .Append(src.Substring(headerIndex, to.endOffset - to.startOffset)) .Append(GetPostTag(subInfo.seqnum)); srcIndex = to.endOffset - s; } } fragment.Append(src.Substring(srcIndex)); return fragment.ToString(); }
protected virtual String MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo) { int adjustedStartPos; var fragmentSource = GetFragmentSource(buffer, index, values, fragInfo, out adjustedStartPos); return MakeFragment(fragInfo, fragmentSource, adjustedStartPos); }
protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo) { int s = fragInfo.startOffset; return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s); }
private string GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo weightedFragInfo, out int startOffset, IState state) { while (buffer.Length < weightedFragInfo.endOffset && index[0] < values.Length) { buffer.Append(values[index[0]].StringValue(state)); if (values[index[0]].IsTokenized && values[index[0]].StringValue(state).Length > 0 && index[0] + 1 < values.Length) { buffer.Append(' '); } index[0]++; } var endOffset = 0; startOffset = buffer.Length - 1; foreach (var subInfo in weightedFragInfo.subInfos) { foreach (var termsOffset in subInfo.termsOffsets) { if (termsOffset.startOffset < startOffset) { startOffset = termsOffset.startOffset; } if (termsOffset.endOffset > endOffset) { endOffset = termsOffset.endOffset; } } } int maxStart = startOffset; int minEnd = endOffset; var maxLength = weightedFragInfo.endOffset - weightedFragInfo.startOffset; var bufferLength = buffer.Length; var stopChars = new[] { ',', '.', ';', '!', '?' }; while (endOffset - startOffset < maxLength - 2) // limit the size of the returned string { if (endOffset >= bufferLength) // out of range { endOffset = bufferLength; startOffset = Math.Max(0, startOffset - maxLength - endOffset); break; } if (startOffset == 0) // out of range { endOffset = Math.Min(bufferLength, startOffset + maxLength); break; } var localStartOffset = startOffset; if (stopChars.Any(c => c == buffer[localStartOffset]) && buffer[startOffset + 1] == ' ') { startOffset += 2; //remove the char and the white space endOffset = Math.Min(bufferLength, startOffset + maxLength); break; } endOffset++; startOffset--; } int retStartOffset; var retVal = TrimEdges(buffer, startOffset, endOffset, out retStartOffset, maxStart, minEnd); // cuts part words startOffset = retStartOffset; return(retVal); }
protected virtual string MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, int fragCharSize, IState state) { int adjustedStartPos; var fragmentSource = GetFragmentSource(buffer, index, values, fragInfo, out adjustedStartPos, state); return(MakeFragment(fragInfo, fragmentSource, adjustedStartPos, fragCharSize)); }
protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo) { int s = fragInfo.startOffset; return(MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s)); }
private String MakeFragment(WeightedFragInfo fragInfo, String src, int adjustedStart, int fragCharSize) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; var items = from subInfo in fragInfo.subInfos from to in subInfo.termsOffsets orderby to.startOffset select new { to, subInfo }; foreach (var item in items) { var headerIndex = item.to.startOffset - adjustedStart; var matchLen = item.to.endOffset - item.to.startOffset; var startLen = Math.Max(0, Math.Min(headerIndex - srcIndex, (fragCharSize - matchLen)/2)); fragCharSize -= matchLen + startLen; fragment.Append(src.Substring(headerIndex-startLen, startLen)) .Append(GetPreTag(item.subInfo.seqnum)) .Append(src.Substring(headerIndex, matchLen)) .Append(GetPostTag(item.subInfo.seqnum)); srcIndex = item.to.endOffset - adjustedStart; } fragment.Append(src.Substring(srcIndex, Math.Min(Math.Max(0, fragCharSize), src.Length - srcIndex))); return fragment.ToString(); }
private String MakeFragment(WeightedFragInfo fragInfo, String src, int s) { StringBuilder fragment = new StringBuilder(); int srcIndex = 0; var items = from subInfo in fragInfo.subInfos from to in subInfo.termsOffsets orderby to.startOffset select new { to, subInfo }; foreach (var item in items) { var headerIndex = item.to.startOffset - s; fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex)) .Append(GetPreTag(item.subInfo.seqnum)) .Append(src.Substring(headerIndex, item.to.endOffset - item.to.startOffset)) .Append(GetPostTag(item.subInfo.seqnum)); srcIndex = item.to.endOffset - s; } fragment.Append(src.Substring(srcIndex)); return fragment.ToString(); }