/// <summary>
        /// 搜集offset到index中间的分割策略
        /// </summary>
        /// <param name="target">目标偏移量</param>
        /// <param name="offset">当前偏移量</param>
        /// <returns></returns>
        private static List <RepeateModel> GetFromSpace(int target, ref int offset)
        {
            var list = new List <RepeateModel>();

            //如果当前偏移量比前一偏移量多4个长度,那么以4个增长点上增
            while (target - offset > 3)
            {
                RepeateModel model = new RepeateModel
                {
                    StartIndex = offset,
                    Length     = 4
                };
                offset += 4;
                list.Add(model);
            }


            int remain = target - offset;

            //如果处理完之后偏移量仍然有剩余
            if (remain > 0)
            {
                if (remain == 3)
                {
                    RepeateModel model2 = new RepeateModel();
                    model2.StartIndex = offset;
                    //如果相差3个长度
                    model2.Length = 2;
                    offset       += 2;
                    //偏移量增加
                    list.Add(model2);

                    RepeateModel model3 = new RepeateModel();
                    model3.StartIndex = offset;
                    //如果相差3个长度
                    model3.Length = 1;
                    //偏移量增加
                    list.Add(model3);
                }
                else
                {
                    RepeateModel model2 = new RepeateModel();
                    model2.StartIndex = offset;
                    //如果相差3个长度
                    model2.Length = remain;
                    //偏移量增加
                    list.Add(model2);
                }


                //最后处理完应该offset = target;
                offset = target;
            }

            return(list);
        }
Beispiel #2
0
        /// <summary>
        /// 搜集offset到index中间的分割策略
        /// </summary>
        /// <param name="target">目标偏移量</param>
        /// <param name="offset">当前偏移量</param>
        /// <returns></returns>
        private static List <RepeateModel> GetFromSpace(int target, ref int offset)
        {
            var list = new List <RepeateModel>();

            //如果当前偏移量比前一偏移量多4个长度,那么以4个增长点上增
            while (target - offset > 3)
            {
                RepeateModel model = new RepeateModel
                {
                    StartIndex = offset,
                    Length     = 4
                };
                offset += 4;
                list.Add(model);
            }


            //如果处理完之后偏移量仍然有剩余
            if (offset != target)
            {
                RepeateModel model2 = new RepeateModel();
                //如果相差3个长度
                if (target - offset == 3)
                {
                    //提前借一位,并取4个长度
                    model2.StartIndex = offset;
                    model2.Length     = 4;
                }
                else
                {
                    model2.StartIndex = offset;
                    model2.Length     = target - offset;
                }
                //偏移量增加
                list.Add(model2);
                //最后处理完应该offset = target;
                offset = target;
            }

            return(list);
        }
        private RepeateModel GetFrequencyByOffsetAndIndex(string str, int index, int length)
        {
            //如果是1位,则直接取1位
            var model = new RepeateModel()
            {
                Length     = length,
                StartIndex = 0
            };

            foreach (var item in TripCache)
            {
                if (index + length <= item.Key.Length)
                {
                    var matchStr = item.Key.Substring(index, length);
                    if (str == matchStr)
                    {
                        model.MatchCount += 1;
                    }
                }
            }
            return(model);
        }
        /// <summary>
        /// 对一段字符串进行高频解析
        /// </summary>
        /// <param name="str">字符串</param>
        /// <param name="index">字符串之前的偏移量</param>
        /// <param name="offset">当前偏移量</param>
        /// <returns></returns>
        private List <RepeateModel> GetHighFrequency(string str, int index, int offset = 0, MatchOrder order = MatchOrder.None, string other = default)
        {
            List <RepeateModel> result = new List <RepeateModel>();
            RepeateModel        model  = default;

            if (str.Length < 3 || str.Length == 4)
            {
                model             = GetFrequencyByOffsetAndIndex(str, index, str.Length);
                model.StartIndex += offset;
                result.Add(model);
            }
            else if (str.Length == 3)
            {
                // 3个字符分割成 2 + 1 分别求权值
                int tempPriority1 = 0;
                var model1        = GetFrequencyByOffsetAndIndex(str.Substring(0, 2), index, 2);
                tempPriority1 += model1.MatchCount;
                var model2 = GetFrequencyByOffsetAndIndex(str.Substring(2, 1), index + 2, 1);
                tempPriority1 += model2.MatchCount;


                //3个字符分割成 1 + 2 分别求权值
                int tempPriority2 = 0;
                var model3        = GetFrequencyByOffsetAndIndex(str.Substring(0, 1), index, 1);
                tempPriority2 += model3.MatchCount;
                var model4 = GetFrequencyByOffsetAndIndex(str.Substring(1, 2), index + 1, 2);
                tempPriority2 += model4.MatchCount;


                //3个字符借位求权
                int tempPriority3 = 0;
                if (order != MatchOrder.None)
                {
                    model         = GetFrequencyByOffsetAndIndex(other, index, 4);
                    tempPriority3 = model.MatchCount;
                }

                //匹配权比较
                if (tempPriority1 >= tempPriority2)
                {
                    if (tempPriority1 >= tempPriority3)
                    {
                        model1.StartIndex += offset;
                        result.Add(model1);
                        model2.StartIndex += offset + 2;
                        result.Add(model2);
                    }
                    else if (order == MatchOrder.RightToLeft)
                    {
                        model.StartIndex += offset - 1;
                        result.Add(model);
                    }
                    else if (order == MatchOrder.LeftToRight)
                    {
                        model.StartIndex += offset;
                        result.Add(model);
                    }
                }
                else
                {
                    if (tempPriority2 >= tempPriority3)
                    {
                        model3.StartIndex += offset;
                        result.Add(model3);
                        model4.StartIndex += offset + 1;
                        result.Add(model4);
                    }
                    else if (order == MatchOrder.RightToLeft)
                    {
                        model.StartIndex += offset - 1;
                        result.Add(model);
                    }
                    else if (order == MatchOrder.LeftToRight)
                    {
                        model.StartIndex += offset;
                        result.Add(model);
                    }
                }
            }
            else
            {
                //如果是4个或者4个以上的, 那么找到4个字符为一组的,匹配最多的那组
                model = GetMaxFrequencyModel(str, index);


                //如果该组左侧有字符,那么递归处理左侧字符
                //如果获取的匹配节点的起点不在原点,证明左边是有剩余字符串的
                if (model.StartIndex > 0)
                {
                    //获取左边的字符
                    var source = str.Substring(0, model.StartIndex);
                    if (source.Length == 3)
                    {
                        result.AddRange(GetHighFrequency(source, index, offset, MatchOrder.LeftToRight, str.Substring(0, 4)));
                    }
                    else
                    {
                        result.AddRange(GetHighFrequency(source, index, offset));
                    }
                }

                //如果该组右侧有字符,那么递归处理右侧字符
                if (model.StartIndex + 4 < str.Length)
                {
                    int tempOffset = model.StartIndex + 4;
                    var source     = str.Substring(model.StartIndex, str.Length - tempOffset);
                    if (source.Length == 3)
                    {
                        result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset, MatchOrder.RightToLeft, str.Substring(model.StartIndex + 3, 4)));
                    }
                    else
                    {
                        result.AddRange(GetHighFrequency(source, index + 4, tempOffset + offset));
                    }
                }


                model.StartIndex += offset;
                result.Add(model);
            }

            return(result);
        }