public RepExpression(Expression expression, Expression?separator, RepCount count, MatchOrder order = MatchOrder.Greedy) { Expression = expression; Separator = separator; Count = count; Order = order; }
/// <summary> /// 对一段字符串进行高频解析 /// </summary> /// <param name="str">字符串</param> /// <param name="index">字符串之前的偏移量</param> /// <param name="offset">当前偏移量</param> /// <returns></returns> private List <RepeateModel> GetHighFrequency(string str, int index, int offset = 0, MatchOrder order = MatchOrder.None, string other = default) { List <RepeateModel> result = new List <RepeateModel>(); RepeateModel model = default; if (str.Length < 3 || str.Length == 4) { model = GetFrequencyByOffsetAndIndex(str, index, str.Length); model.StartIndex += offset; result.Add(model); } else if (str.Length == 3) { // 3个字符分割成 2 + 1 分别求权值 int tempPriority1 = 0; var model1 = GetFrequencyByOffsetAndIndex(str.Substring(0, 2), index, 2); tempPriority1 += model1.MatchCount; var model2 = GetFrequencyByOffsetAndIndex(str.Substring(2, 1), index + 2, 1); tempPriority1 += model2.MatchCount; //3个字符分割成 1 + 2 分别求权值 int tempPriority2 = 0; var model3 = GetFrequencyByOffsetAndIndex(str.Substring(0, 1), index, 1); tempPriority2 += model3.MatchCount; var model4 = GetFrequencyByOffsetAndIndex(str.Substring(1, 2), index + 1, 2); tempPriority2 += model4.MatchCount; //3个字符借位求权 int tempPriority3 = 0; if (order != MatchOrder.None) { model = GetFrequencyByOffsetAndIndex(other, index, 4); tempPriority3 = model.MatchCount; } //匹配权比较 if (tempPriority1 >= tempPriority2) { if (tempPriority1 >= tempPriority3) { model1.StartIndex += offset; result.Add(model1); model2.StartIndex += offset + 2; result.Add(model2); } else if (order == MatchOrder.RightToLeft) { model.StartIndex += offset - 1; result.Add(model); } else if (order == MatchOrder.LeftToRight) { model.StartIndex += offset; result.Add(model); } } else { if (tempPriority2 >= tempPriority3) { model3.StartIndex += offset; result.Add(model3); model4.StartIndex += offset + 1; result.Add(model4); } else if (order == MatchOrder.RightToLeft) { model.StartIndex += offset - 1; result.Add(model); } else if (order == MatchOrder.LeftToRight) { model.StartIndex += offset; result.Add(model); } } } else { //如果是4个或者4个以上的, 那么找到4个字符为一组的,匹配最多的那组 model = GetMaxFrequencyModel(str, index); //如果该组左侧有字符,那么递归处理左侧字符 //如果获取的匹配节点的起点不在原点,证明左边是有剩余字符串的 if (model.StartIndex > 0) { //获取左边的字符 var source = str.Substring(0, model.StartIndex); if (source.Length == 3) { result.AddRange(GetHighFrequency(source, index, offset, MatchOrder.LeftToRight, str.Substring(0, 4))); } else { result.AddRange(GetHighFrequency(source, index, offset)); } } //如果该组右侧有字符,那么递归处理右侧字符 if (model.StartIndex + 4 < str.Length) { int tempOffset = model.StartIndex + 4; var source = str.Substring(model.StartIndex, str.Length - tempOffset); if (source.Length == 3) { result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset, MatchOrder.RightToLeft, str.Substring(model.StartIndex + 3, 4))); } else { result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset)); } } model.StartIndex += offset; result.Add(model); } return(result); }
public OptExpression(Expression expression, MatchOrder order = MatchOrder.Greedy) { Expression = expression; Order = order; }