Пример #1
0
        private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
        {
            StringBuilder fragment = new StringBuilder();
            int           srcIndex = 0;
            var           items    = from subInfo in fragInfo.subInfos
                                     from to in subInfo.termsOffsets
                                     orderby to.startOffset
                                     select new
            {
                to,
                subInfo
            };

            foreach (var item in items)
            {
                var headerIndex = item.to.startOffset - s;
                fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex))
                .Append(GetPreTag(item.subInfo.seqnum))
                .Append(src.Substring(headerIndex, item.to.endOffset - item.to.startOffset))
                .Append(GetPostTag(item.subInfo.seqnum));
                srcIndex = item.to.endOffset - s;
            }
            fragment.Append(src.Substring(srcIndex));
            return(fragment.ToString());
        }
Пример #2
0
        public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, int fragCharSize, IState state)
        {
            if (maxNumFragments < 0)
            {
                throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
            }

            List <WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);

            List <String> fragments = new List <String>(maxNumFragments);

            Field[] values = GetFields(reader, docId, fieldName, state);
            if (values.Length == 0)
            {
                return(null);
            }
            StringBuilder buffer = new StringBuilder();

            int[] nextValueIndex = { 0 };
            for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, fragCharSize, state));
            }
            return(fragments.ToArray());
        }
Пример #3
0
        private String MakeFragment(WeightedFragInfo fragInfo, String src, int adjustedStart, int fragCharSize)
        {
            StringBuilder fragment = new StringBuilder();
            int           srcIndex = 0;
            var           items    = from subInfo in fragInfo.subInfos
                                     from to in subInfo.termsOffsets
                                     orderby to.startOffset
                                     select new
            {
                to,
                subInfo
            };

            foreach (var item in items)
            {
                var headerIndex = item.to.startOffset - adjustedStart;
                var matchLen    = item.to.endOffset - item.to.startOffset;
                var startLen    = Math.Max(0, Math.Min(headerIndex - srcIndex, (fragCharSize - matchLen) / 2));

                fragCharSize -= matchLen + startLen;

                fragment.Append(src.Substring(headerIndex - startLen, startLen))
                .Append(GetPreTag(item.subInfo.seqnum))
                .Append(src.Substring(headerIndex, matchLen))
                .Append(GetPostTag(item.subInfo.seqnum));
                srcIndex = item.to.endOffset - adjustedStart;
            }
            fragment.Append(src.Substring(srcIndex, Math.Min(Math.Max(0, fragCharSize), src.Length - srcIndex)));
            return(fragment.ToString());
        }
Пример #4
0
        private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
        {
            StringBuilder fragment = new StringBuilder();
            int           srcIndex = 0;

            foreach (SubInfo subInfo in fragInfo.subInfos)
            {
                foreach (Toffs to in subInfo.termsOffsets)
                {
                    fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
                    .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum));
                    srcIndex = to.endOffset - s;
                }
            }
            fragment.Append(src.Substring(srcIndex));
            return(fragment.ToString());
        }
Пример #5
0
		private string GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo weightedFragInfo, out int startOffset)
		{
			while (buffer.Length < weightedFragInfo.endOffset && index[0] < values.Length)
			{
				buffer.Append(values[index[0]].StringValue);
				if (values[index[0]].IsTokenized && values[index[0]].StringValue.Length > 0 && index[0] + 1 < values.Length)
					buffer.Append(' ');
				index[0]++;
			}

			var endOffset = 0;
			startOffset = buffer.Length - 1;

			foreach (var subInfo in weightedFragInfo.subInfos)
			{
				foreach (var termsOffset in subInfo.termsOffsets)
				{
					if (termsOffset.startOffset < startOffset)
						startOffset = termsOffset.startOffset;
					if (termsOffset.endOffset > endOffset)
						endOffset = termsOffset.endOffset;
				}
			}

			int maxStart = startOffset;
			int minEnd = endOffset;

			var maxLength = weightedFragInfo.endOffset - weightedFragInfo.startOffset;
			var bufferLength = buffer.Length;
			var stopChars = new[] {',', '.', ';','!','?'};

			while (endOffset - startOffset < maxLength - 2) // limit the size of the returned string
			{
				if (endOffset >= bufferLength) // out of range
				{
					endOffset = bufferLength;

					startOffset = Math.Max(0, startOffset - maxLength - endOffset);
					break;
				}
				if (startOffset == 0) // out of range
				{
					endOffset = Math.Min(bufferLength, startOffset + maxLength);
					break;
				}
				var localStartOffset = startOffset;
				if (stopChars.Any(c => c == buffer[localStartOffset]) && buffer[startOffset + 1] == ' ')
				{
					startOffset += 2; //remove the char and the white space

					endOffset = Math.Min(bufferLength, startOffset + maxLength);
					break;
				}

				endOffset++;
				startOffset--;
			}

			int retStartOffset;
			var retVal = TrimEdges(buffer, startOffset, endOffset, out retStartOffset, maxStart, minEnd); // cuts part words
			startOffset = retStartOffset;
			return retVal;
		}
Пример #6
0
	    private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
	    {
            StringBuilder fragment = new StringBuilder();
            int srcIndex = 0;
            foreach (SubInfo subInfo in fragInfo.subInfos)
            {
                foreach (Toffs to in subInfo.termsOffsets)
                {
	                var headerIndex = to.startOffset - s;
                    fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex))
						.Append(GetPreTag(subInfo.seqnum))
						.Append(src.Substring(headerIndex, to.endOffset - to.startOffset))
						.Append(GetPostTag(subInfo.seqnum));
                    srcIndex = to.endOffset - s;
                }
            }
            fragment.Append(src.Substring(srcIndex));
            return fragment.ToString();
        }
Пример #7
0
        protected virtual String MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo)
        {
	        int adjustedStartPos;
	        var fragmentSource = GetFragmentSource(buffer, index, values, fragInfo, out adjustedStartPos);
			return MakeFragment(fragInfo, fragmentSource, adjustedStartPos);
        }
Пример #8
0
 protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
 {
     int s = fragInfo.startOffset;
     return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s);
 }
Пример #9
0
        private string GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo weightedFragInfo, out int startOffset, IState state)
        {
            while (buffer.Length < weightedFragInfo.endOffset && index[0] < values.Length)
            {
                buffer.Append(values[index[0]].StringValue(state));
                if (values[index[0]].IsTokenized && values[index[0]].StringValue(state).Length > 0 && index[0] + 1 < values.Length)
                {
                    buffer.Append(' ');
                }
                index[0]++;
            }

            var endOffset = 0;

            startOffset = buffer.Length - 1;

            foreach (var subInfo in weightedFragInfo.subInfos)
            {
                foreach (var termsOffset in subInfo.termsOffsets)
                {
                    if (termsOffset.startOffset < startOffset)
                    {
                        startOffset = termsOffset.startOffset;
                    }
                    if (termsOffset.endOffset > endOffset)
                    {
                        endOffset = termsOffset.endOffset;
                    }
                }
            }

            int maxStart = startOffset;
            int minEnd   = endOffset;

            var maxLength    = weightedFragInfo.endOffset - weightedFragInfo.startOffset;
            var bufferLength = buffer.Length;
            var stopChars    = new[] { ',', '.', ';', '!', '?' };

            while (endOffset - startOffset < maxLength - 2) // limit the size of the returned string
            {
                if (endOffset >= bufferLength)              // out of range
                {
                    endOffset = bufferLength;

                    startOffset = Math.Max(0, startOffset - maxLength - endOffset);
                    break;
                }
                if (startOffset == 0) // out of range
                {
                    endOffset = Math.Min(bufferLength, startOffset + maxLength);
                    break;
                }
                var localStartOffset = startOffset;
                if (stopChars.Any(c => c == buffer[localStartOffset]) && buffer[startOffset + 1] == ' ')
                {
                    startOffset += 2; //remove the char and the white space

                    endOffset = Math.Min(bufferLength, startOffset + maxLength);
                    break;
                }

                endOffset++;
                startOffset--;
            }

            int retStartOffset;
            var retVal = TrimEdges(buffer, startOffset, endOffset, out retStartOffset, maxStart, minEnd); // cuts part words

            startOffset = retStartOffset;
            return(retVal);
        }
Пример #10
0
        protected virtual string MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, int fragCharSize, IState state)
        {
            int adjustedStartPos;
            var fragmentSource = GetFragmentSource(buffer, index, values, fragInfo, out adjustedStartPos, state);

            return(MakeFragment(fragInfo, fragmentSource, adjustedStartPos, fragCharSize));
        }
Пример #11
0
        protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
        {
            int s = fragInfo.startOffset;

            return(MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s));
        }
Пример #12
0
        private String MakeFragment(WeightedFragInfo fragInfo, String src, int adjustedStart, int fragCharSize)
        {
            StringBuilder fragment = new StringBuilder();
            int srcIndex = 0;
            var items = from subInfo in fragInfo.subInfos
                from to in subInfo.termsOffsets
                orderby to.startOffset
                select new
                {
                    to,
                    subInfo
                };
            foreach (var item in items)
            {
                var headerIndex = item.to.startOffset - adjustedStart;
                var matchLen = item.to.endOffset - item.to.startOffset;
                var startLen = Math.Max(0, Math.Min(headerIndex - srcIndex, (fragCharSize - matchLen)/2));

                fragCharSize -= matchLen + startLen;

                fragment.Append(src.Substring(headerIndex-startLen, startLen))
                    .Append(GetPreTag(item.subInfo.seqnum))
                    .Append(src.Substring(headerIndex, matchLen))
                    .Append(GetPostTag(item.subInfo.seqnum));
                srcIndex = item.to.endOffset - adjustedStart;
            }
            fragment.Append(src.Substring(srcIndex, Math.Min(Math.Max(0, fragCharSize), src.Length - srcIndex)));
            return fragment.ToString();
        }
Пример #13
0
	    private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
	    {
		    StringBuilder fragment = new StringBuilder();
		    int srcIndex = 0;
		    var items = from subInfo in fragInfo.subInfos
			    from to in subInfo.termsOffsets
			    orderby to.startOffset
			    select new
			    {
				    to,
				    subInfo
			    };
			foreach (var item in items)
		    {
			    var headerIndex = item.to.startOffset - s;
			    fragment.Append(src.Substring(srcIndex, headerIndex - srcIndex))
					.Append(GetPreTag(item.subInfo.seqnum))
					.Append(src.Substring(headerIndex, item.to.endOffset - item.to.startOffset))
					.Append(GetPostTag(item.subInfo.seqnum));
				srcIndex = item.to.endOffset - s;
		    }
		    fragment.Append(src.Substring(srcIndex));
		    return fragment.ToString();
	    }