예제 #1
0
        /// <summary>
        /// 单连续字串(不带空格符)单Field查询分析
        /// </summary>
        /// <param name="field"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        public static Query Parse(String field, String query)
        {
            if (field == null)
            {
                throw new ArgumentException("parameter \"field\" is null");
            }

            if (query == null || "".Equals(query.Trim()))
            {
                return new TermQuery(new Term(field));
            }

            //从缓存中取出已经解析的query生产的TokenBranch
            TokenBranch root = GetCachedTokenBranch(query);
            if (root != null)
            {
                return OptimizeQueries(root.ToQueries(field));
            }
            else
            {
                //System.out.println(System.currentTimeMillis());
                root = new TokenBranch(null);
                //对查询条件q进行分词
                StringReader input = new StringReader(query.Trim());
                IKSegmentation ikSeg = new IKSegmentation(input, isMaxWordLength);
                try
                {
                    for (Lexeme lexeme = ikSeg.Next(); lexeme != null; lexeme = ikSeg.Next())
                    {
                        //处理词元分支
                        root.Accept(lexeme);
                    }
                }
                catch (IOException e)
                {
                    Console.WriteLine(e.StackTrace);
                }
                //缓存解析结果的博弈树
                CachedTokenBranch(query, root);
                return OptimizeQueries(root.ToQueries(field));
            }
        }
예제 #2
0
            ///<summary>
            ///组合词元分支
            /// </summary>
            /// <returns>返回当前branch能否接收词元对象</returns>
            public bool Accept(Lexeme _lexeme)
            {

                /*
                 * 检查新的lexeme 对当前的branch 的可接受类型
                 * acceptType : REFUSED  不能接受
                 * acceptType : ACCEPTED 接受
                 * acceptType : TONEXT   由相邻分支接受 
                 */
                int acceptType = CheckAccept(_lexeme);
                switch (acceptType)
                {
                    case REFUSED:
                        // REFUSE 情况
                        return false;

                    case ACCEPTED:
                        if (acceptedBranchs == null)
                        {
                            //当前branch没有子branch,则添加到当前branch下
                            acceptedBranchs = new List<TokenBranch>(2);
                            acceptedBranchs.Add(new TokenBranch(_lexeme));
                        }
                        else
                        {
                            bool acceptedByChild = false;
                            //当前branch拥有子branch,则优先由子branch接纳
                            foreach (TokenBranch childBranch in acceptedBranchs)
                            {
                                acceptedByChild = childBranch.Accept(_lexeme) || acceptedByChild;
                            }
                            //如果所有的子branch不能接纳,则由当前branch接纳
                            if (!acceptedByChild)
                            {
                                acceptedBranchs.Add(new TokenBranch(_lexeme));
                            }
                        }
                        //设置branch的最大右边界
                        if (_lexeme.EndPosition > this.rightBorder)
                        {
                            this.rightBorder = _lexeme.EndPosition;
                        }
                        break;

                    case TONEXT:
                        //把lexeme放入当前branch的相邻分支
                        if (this.nextBranch == null)
                        {
                            //如果还没有相邻分支,则建立一个不交叠的分支
                            this.nextBranch = new TokenBranch(null);
                        }
                        this.nextBranch.Accept(_lexeme);
                        break;
                }

                return true;
            }
예제 #3
0
 /// <summary>
 /// 缓存解析结果的博弈树
 /// </summary>
 /// <param name="query"></param>
 /// <param name="tb"></param>
 private static void CachedTokenBranch(String query, TokenBranch tb)
 {
     Dictionary<String, TokenBranch> keywordCache = GetTheadLocalCache();
     keywordCache.Add(query, tb);
 }