/// <summary> /// 单连续字串(不带空格符)单Field查询分析 /// </summary> /// <param name="field"></param> /// <param name="query"></param> /// <returns></returns> public static Query Parse(String field, String query) { if (field == null) { throw new ArgumentException("parameter \"field\" is null"); } if (query == null || "".Equals(query.Trim())) { return new TermQuery(new Term(field)); } //从缓存中取出已经解析的query生产的TokenBranch TokenBranch root = GetCachedTokenBranch(query); if (root != null) { return OptimizeQueries(root.ToQueries(field)); } else { //System.out.println(System.currentTimeMillis()); root = new TokenBranch(null); //对查询条件q进行分词 StringReader input = new StringReader(query.Trim()); IKSegmentation ikSeg = new IKSegmentation(input, isMaxWordLength); try { for (Lexeme lexeme = ikSeg.Next(); lexeme != null; lexeme = ikSeg.Next()) { //处理词元分支 root.Accept(lexeme); } } catch (IOException e) { Console.WriteLine(e.StackTrace); } //缓存解析结果的博弈树 CachedTokenBranch(query, root); return OptimizeQueries(root.ToQueries(field)); } }
///<summary> ///组合词元分支 /// </summary> /// <returns>返回当前branch能否接收词元对象</returns> public bool Accept(Lexeme _lexeme) { /* * 检查新的lexeme 对当前的branch 的可接受类型 * acceptType : REFUSED 不能接受 * acceptType : ACCEPTED 接受 * acceptType : TONEXT 由相邻分支接受 */ int acceptType = CheckAccept(_lexeme); switch (acceptType) { case REFUSED: // REFUSE 情况 return false; case ACCEPTED: if (acceptedBranchs == null) { //当前branch没有子branch,则添加到当前branch下 acceptedBranchs = new List<TokenBranch>(2); acceptedBranchs.Add(new TokenBranch(_lexeme)); } else { bool acceptedByChild = false; //当前branch拥有子branch,则优先由子branch接纳 foreach (TokenBranch childBranch in acceptedBranchs) { acceptedByChild = childBranch.Accept(_lexeme) || acceptedByChild; } //如果所有的子branch不能接纳,则由当前branch接纳 if (!acceptedByChild) { acceptedBranchs.Add(new TokenBranch(_lexeme)); } } //设置branch的最大右边界 if (_lexeme.EndPosition > this.rightBorder) { this.rightBorder = _lexeme.EndPosition; } break; case TONEXT: //把lexeme放入当前branch的相邻分支 if (this.nextBranch == null) { //如果还没有相邻分支,则建立一个不交叠的分支 this.nextBranch = new TokenBranch(null); } this.nextBranch.Accept(_lexeme); break; } return true; }
/// <summary> /// 缓存解析结果的博弈树 /// </summary> /// <param name="query"></param> /// <param name="tb"></param> private static void CachedTokenBranch(String query, TokenBranch tb) { Dictionary<String, TokenBranch> keywordCache = GetTheadLocalCache(); keywordCache.Add(query, tb); }