/// <summary> /// 搜集offset到index中间的分割策略 /// </summary> /// <param name="target">目标偏移量</param> /// <param name="offset">当前偏移量</param> /// <returns></returns> private static List <RepeateModel> GetFromSpace(int target, ref int offset) { var list = new List <RepeateModel>(); //如果当前偏移量比前一偏移量多4个长度,那么以4个增长点上增 while (target - offset > 3) { RepeateModel model = new RepeateModel { StartIndex = offset, Length = 4 }; offset += 4; list.Add(model); } int remain = target - offset; //如果处理完之后偏移量仍然有剩余 if (remain > 0) { if (remain == 3) { RepeateModel model2 = new RepeateModel(); model2.StartIndex = offset; //如果相差3个长度 model2.Length = 2; offset += 2; //偏移量增加 list.Add(model2); RepeateModel model3 = new RepeateModel(); model3.StartIndex = offset; //如果相差3个长度 model3.Length = 1; //偏移量增加 list.Add(model3); } else { RepeateModel model2 = new RepeateModel(); model2.StartIndex = offset; //如果相差3个长度 model2.Length = remain; //偏移量增加 list.Add(model2); } //最后处理完应该offset = target; offset = target; } return(list); }
/// <summary> /// 搜集offset到index中间的分割策略 /// </summary> /// <param name="target">目标偏移量</param> /// <param name="offset">当前偏移量</param> /// <returns></returns> private static List <RepeateModel> GetFromSpace(int target, ref int offset) { var list = new List <RepeateModel>(); //如果当前偏移量比前一偏移量多4个长度,那么以4个增长点上增 while (target - offset > 3) { RepeateModel model = new RepeateModel { StartIndex = offset, Length = 4 }; offset += 4; list.Add(model); } //如果处理完之后偏移量仍然有剩余 if (offset != target) { RepeateModel model2 = new RepeateModel(); //如果相差3个长度 if (target - offset == 3) { //提前借一位,并取4个长度 model2.StartIndex = offset; model2.Length = 4; } else { model2.StartIndex = offset; model2.Length = target - offset; } //偏移量增加 list.Add(model2); //最后处理完应该offset = target; offset = target; } return(list); }
private RepeateModel GetFrequencyByOffsetAndIndex(string str, int index, int length) { //如果是1位,则直接取1位 var model = new RepeateModel() { Length = length, StartIndex = 0 }; foreach (var item in TripCache) { if (index + length <= item.Key.Length) { var matchStr = item.Key.Substring(index, length); if (str == matchStr) { model.MatchCount += 1; } } } return(model); }
/// <summary> /// 对一段字符串进行高频解析 /// </summary> /// <param name="str">字符串</param> /// <param name="index">字符串之前的偏移量</param> /// <param name="offset">当前偏移量</param> /// <returns></returns> private List <RepeateModel> GetHighFrequency(string str, int index, int offset = 0, MatchOrder order = MatchOrder.None, string other = default) { List <RepeateModel> result = new List <RepeateModel>(); RepeateModel model = default; if (str.Length < 3 || str.Length == 4) { model = GetFrequencyByOffsetAndIndex(str, index, str.Length); model.StartIndex += offset; result.Add(model); } else if (str.Length == 3) { // 3个字符分割成 2 + 1 分别求权值 int tempPriority1 = 0; var model1 = GetFrequencyByOffsetAndIndex(str.Substring(0, 2), index, 2); tempPriority1 += model1.MatchCount; var model2 = GetFrequencyByOffsetAndIndex(str.Substring(2, 1), index + 2, 1); tempPriority1 += model2.MatchCount; //3个字符分割成 1 + 2 分别求权值 int tempPriority2 = 0; var model3 = GetFrequencyByOffsetAndIndex(str.Substring(0, 1), index, 1); tempPriority2 += model3.MatchCount; var model4 = GetFrequencyByOffsetAndIndex(str.Substring(1, 2), index + 1, 2); tempPriority2 += model4.MatchCount; //3个字符借位求权 int tempPriority3 = 0; if (order != MatchOrder.None) { model = GetFrequencyByOffsetAndIndex(other, index, 4); tempPriority3 = model.MatchCount; } //匹配权比较 if (tempPriority1 >= tempPriority2) { if (tempPriority1 >= tempPriority3) { model1.StartIndex += offset; result.Add(model1); model2.StartIndex += offset + 2; result.Add(model2); } else if (order == MatchOrder.RightToLeft) { model.StartIndex += offset - 1; result.Add(model); } else if (order == MatchOrder.LeftToRight) { model.StartIndex += offset; result.Add(model); } } else { if (tempPriority2 >= tempPriority3) { model3.StartIndex += offset; result.Add(model3); model4.StartIndex += offset + 1; result.Add(model4); } else if (order == MatchOrder.RightToLeft) { model.StartIndex += offset - 1; result.Add(model); } else if (order == MatchOrder.LeftToRight) { model.StartIndex += offset; result.Add(model); } } } else { //如果是4个或者4个以上的, 那么找到4个字符为一组的,匹配最多的那组 model = GetMaxFrequencyModel(str, index); //如果该组左侧有字符,那么递归处理左侧字符 //如果获取的匹配节点的起点不在原点,证明左边是有剩余字符串的 if (model.StartIndex > 0) { //获取左边的字符 var source = str.Substring(0, model.StartIndex); if (source.Length == 3) { result.AddRange(GetHighFrequency(source, index, offset, MatchOrder.LeftToRight, str.Substring(0, 4))); } else { result.AddRange(GetHighFrequency(source, index, offset)); } } //如果该组右侧有字符,那么递归处理右侧字符 if (model.StartIndex + 4 < str.Length) { int tempOffset = model.StartIndex + 4; var source = str.Substring(model.StartIndex, str.Length - tempOffset); if (source.Length == 3) { result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset, MatchOrder.RightToLeft, str.Substring(model.StartIndex + 3, 4))); } else { result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset)); } } model.StartIndex += offset; result.Add(model); } return(result); }