C# (CSharp) IKAnalyzer.core LexemePath 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: IKAnalyzer.core

클래스/타입: LexemePath

hotexamples.com에서의 예제들: 6

C# (CSharp) IKAnalyzer.core LexemePath - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 IKAnalyzer.core.LexemePath에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CheckOverlap(1)

Copy(1)

ExpandNonOverlapLexeme(1)

ExpandOverlapLexeme(1)

예제 #1

파일 보기

파일: IKArbitrator.cs 프로젝트: SilentCC/IKAnalyzer

 /// <summary>
 /// 回滚词元链，直到词元链能接受指定的词元
 /// </summary>
 /// <param name="lex"></param>
 /// <param name="path"></param>
 private void BackPath(Lexeme lex, LexemePath path)
 {
     while (path.CheckOverlap(lex))
     {
         path.RemoveTail();
     }
 }

예제 #2

파일 보기

파일: LexemePath.cs 프로젝트: SilentCC/IKAnalyzer

        /// <summary>
        /// Copy 当前词元链
        /// </summary>
        /// <returns></returns>
        public LexemePath Copy()
        {
            var copy = new LexemePath()
            {
                _begin = this._begin, _end = this._end, _length = this.Length
            };
            var cur = Head;

            while (cur != null && cur.V != null)
            {
                copy.Insert(cur.V);
                cur = cur.Next;
            }
            return(copy);
        }

예제 #3

파일 보기

파일: IKArbitrator.cs 프로젝트: SilentCC/IKAnalyzer

        /// <summary>
        /// 分词歧义处理
        /// </summary>
        /// <param name="context"></param>
        /// <param name="useSmart"></param>
        public void Process(AnalyzeContext context, bool useSmart)
        {
            var lexs = context.RawLexemes;      // 原始词元

            var lex         = lexs.PollFirst();
            var overlapPath = new LexemePath();

            while (lex != null)
            {
                // lex没有添加进overlapPath，此时 overlapPath.Size > 0
                if (!overlapPath.ExpandOverlapLexeme(lex))
                {
                    if (overlapPath.Size == 1 || !useSmart)     // 词元链中只有一个词元，或者不使用智能分词时，不进行歧义处理，直接添加到context中
                    {
                        context.AddLexemePath(overlapPath);
                    }
                    else                                        // 否则，进行歧义处理
                    {
                        // overlapPath.Size > 1
                        var head      = overlapPath.Head;
                        var judgePath = Judge(head, overlapPath.PathSpan);
                        context.AddLexemePath(judgePath);
                    }

                    overlapPath = new LexemePath();
                    overlapPath.ExpandOverlapLexeme(lex);
                }
                lex = lexs.PollFirst();
            }

            // 退出循环后最后再处理 overlapPath
            if (overlapPath.Size == 1 || !useSmart)
            {
                context.AddLexemePath(overlapPath);
            }
            else
            {
                var head = overlapPath.Head;
                context.AddLexemePath(Judge(head, overlapPath.PathSpan));
            }
        }

예제 #4

파일 보기

파일: IKArbitrator.cs 프로젝트: SilentCC/IKAnalyzer

        /// <summary>
        /// 歧义识别
        /// </summary>
        /// <param name="cell"></param>
        /// <param name="fullTextLen"></param>
        /// <returns></returns>
        public LexemePath Judge(QuickSortSet <Lexeme> .Cell cell, int fullTextLen)
        {
            // 无冲突的词元链候选集合
            var pathOptions = new SortedSet <LexemePath>();
            // 用于存储无冲突词元的词元链
            var option = new LexemePath();
            var stack  = ForwardPath(cell, option);

            pathOptions.Add(option.Copy());

            while (stack.Count > 0)
            {
                var curCell = stack.Pop();
                // 回滚词元链
                BackPath(curCell.V, option);
                // 从当前歧义位置开始，前向获取无冲突的词元
                ForwardPath(curCell, option);
                pathOptions.Add(option.Copy());
            }
            return(pathOptions.First());    // 排名越靠前的是越优的分词方案
        }

예제 #5

파일 보기

파일: LexemePath.cs 프로젝트: SilentCC/IKAnalyzer

        public int CompareTo(LexemePath other)
        {
            if (this._length > other._length)
            {
                return(-1);                                 // 有效文本长度越长越好
            }
            if (this._length < other._length)
            {
                return(1);
            }

            if (Size < other.Size)
            {
                return(-1);                                 // 词元数量越少越好
            }
            if (Size > other.Size)
            {
                return(1);
            }

            if (PathSpan > other.PathSpan)
            {
                return(-1);                                 // 路径跨度越大越好
            }
            if (PathSpan < other.PathSpan)
            {
                return(1);
            }

            if (this._end > other._end)
            {
                return(-1);                                 // 根据统计学结论，逆向切分概率高于正向切分，所以位置越靠后越好
            }
            if (this._end < other._end)
            {
                return(1);
            }

            var x_weight_1 = this.GetXWeight();
            var x_weight_2 = other.GetXWeight();

            if (x_weight_1 > x_weight_2)
            {
                return(-1);                              // 词元长度越平均越好
            }
            if (x_weight_1 < x_weight_2)
            {
                return(1);
            }

            var p_weight_1 = this.GetPWeight();
            var p_weight_2 = other.GetPWeight();

            if (p_weight_1 > p_weight_2)
            {
                return(-1);                             // 词元位置权重比较
            }
            if (p_weight_1 < p_weight_2)
            {
                return(1);
            }

            return(0);
        }

예제 #6

파일 보기

파일: IKArbitrator.cs 프로젝트: SilentCC/IKAnalyzer

        /// <summary>
        /// 前向遍历，将有无冲突的词元添加进path，有冲突的词元添加入栈
        /// </summary>
        /// <param name="cell"></param>
        /// <param name="path"></param>
        /// <returns>返回有冲突的词元栈</returns>
        private Stack <QuickSortSet <Lexeme> .Cell> ForwardPath(QuickSortSet <Lexeme> .Cell cell, LexemePath path)
        {
            // 发生冲突的 Lexeme 栈
            var stack = new Stack <QuickSortSet <Lexeme> .Cell>();
            var cur   = cell;

            while (cur != null && cur.V != null)
            {
                if (!path.ExpandNonOverlapLexeme(cur.V)) // cur.V与path有冲突，cur.V没有被添加进path
                {
                    // 词元交叉，添加失败
                    stack.Push(cur);
                }
                cur = cur.Next;
            }
            return(stack);
        }