Beispiel #1
0
 public RepExpression(Expression expression, Expression?separator, RepCount count, MatchOrder order = MatchOrder.Greedy)
 {
     Expression = expression;
     Separator  = separator;
     Count      = count;
     Order      = order;
 }
        /// <summary>
        /// 对一段字符串进行高频解析
        /// </summary>
        /// <param name="str">字符串</param>
        /// <param name="index">字符串之前的偏移量</param>
        /// <param name="offset">当前偏移量</param>
        /// <returns></returns>
        private List <RepeateModel> GetHighFrequency(string str, int index, int offset = 0, MatchOrder order = MatchOrder.None, string other = default)
        {
            List <RepeateModel> result = new List <RepeateModel>();
            RepeateModel        model  = default;

            if (str.Length < 3 || str.Length == 4)
            {
                model             = GetFrequencyByOffsetAndIndex(str, index, str.Length);
                model.StartIndex += offset;
                result.Add(model);
            }
            else if (str.Length == 3)
            {
                // 3个字符分割成 2 + 1 分别求权值
                int tempPriority1 = 0;
                var model1        = GetFrequencyByOffsetAndIndex(str.Substring(0, 2), index, 2);
                tempPriority1 += model1.MatchCount;
                var model2 = GetFrequencyByOffsetAndIndex(str.Substring(2, 1), index + 2, 1);
                tempPriority1 += model2.MatchCount;


                //3个字符分割成 1 + 2 分别求权值
                int tempPriority2 = 0;
                var model3        = GetFrequencyByOffsetAndIndex(str.Substring(0, 1), index, 1);
                tempPriority2 += model3.MatchCount;
                var model4 = GetFrequencyByOffsetAndIndex(str.Substring(1, 2), index + 1, 2);
                tempPriority2 += model4.MatchCount;


                //3个字符借位求权
                int tempPriority3 = 0;
                if (order != MatchOrder.None)
                {
                    model         = GetFrequencyByOffsetAndIndex(other, index, 4);
                    tempPriority3 = model.MatchCount;
                }

                //匹配权比较
                if (tempPriority1 >= tempPriority2)
                {
                    if (tempPriority1 >= tempPriority3)
                    {
                        model1.StartIndex += offset;
                        result.Add(model1);
                        model2.StartIndex += offset + 2;
                        result.Add(model2);
                    }
                    else if (order == MatchOrder.RightToLeft)
                    {
                        model.StartIndex += offset - 1;
                        result.Add(model);
                    }
                    else if (order == MatchOrder.LeftToRight)
                    {
                        model.StartIndex += offset;
                        result.Add(model);
                    }
                }
                else
                {
                    if (tempPriority2 >= tempPriority3)
                    {
                        model3.StartIndex += offset;
                        result.Add(model3);
                        model4.StartIndex += offset + 1;
                        result.Add(model4);
                    }
                    else if (order == MatchOrder.RightToLeft)
                    {
                        model.StartIndex += offset - 1;
                        result.Add(model);
                    }
                    else if (order == MatchOrder.LeftToRight)
                    {
                        model.StartIndex += offset;
                        result.Add(model);
                    }
                }
            }
            else
            {
                //如果是4个或者4个以上的, 那么找到4个字符为一组的,匹配最多的那组
                model = GetMaxFrequencyModel(str, index);


                //如果该组左侧有字符,那么递归处理左侧字符
                //如果获取的匹配节点的起点不在原点,证明左边是有剩余字符串的
                if (model.StartIndex > 0)
                {
                    //获取左边的字符
                    var source = str.Substring(0, model.StartIndex);
                    if (source.Length == 3)
                    {
                        result.AddRange(GetHighFrequency(source, index, offset, MatchOrder.LeftToRight, str.Substring(0, 4)));
                    }
                    else
                    {
                        result.AddRange(GetHighFrequency(source, index, offset));
                    }
                }

                //如果该组右侧有字符,那么递归处理右侧字符
                if (model.StartIndex + 4 < str.Length)
                {
                    int tempOffset = model.StartIndex + 4;
                    var source     = str.Substring(model.StartIndex, str.Length - tempOffset);
                    if (source.Length == 3)
                    {
                        result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset, MatchOrder.RightToLeft, str.Substring(model.StartIndex + 3, 4)));
                    }
                    else
                    {
                        result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset));
                    }
                }


                model.StartIndex += offset;
                result.Add(model);
            }

            return(result);
        }
 public OptExpression(Expression expression, MatchOrder order = MatchOrder.Greedy)
 {
     Expression = expression;
     Order      = order;
 }