// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") { strSearchStyle = "exact"; } string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* * RmsChannel channel = Channels.GetChannel(this.WsUrl); * if (channel == null) * { * strError = "get channel error"; * goto ERROR1; * } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { return(0); // not found } long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List <string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ;) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) { continue; } DupLineItem item = new DupLineItem { Path = strPath, Weight = nWeight, Threshold = nThreshold, Detail = BuildDetail(strDbName, strFrom, strKey, strSearchStyle, nWeight) }; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) { break; } } return(1); ERROR1: return(-1); }
// 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 public static int Merge(string strStyle, DupResultSet sourceLeft, DupResultSet sourceRight, DupResultSet targetLeft, DupResultSet targetMiddle, DupResultSet targetRight, bool bOutputDebugInfo, out string strDebugInfo, out string strError) { strDebugInfo = ""; strError = ""; if (sourceLeft.m_streamSmall == null) { throw new Exception("sourceLeft结果集对象未建索引"); } if (sourceRight.m_streamSmall == null) { throw new Exception("sourceRight结果集对象未建索引"); } if (bOutputDebugInfo == true) { strDebugInfo += "strStyle值:" + strStyle + "<br/>"; strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>"; strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>"; } if (String.Compare(strStyle, "OR", true) == 0) { if (targetLeft != null || targetRight != null) { Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null"); throw (ex); } } DupLineItem dpRecordLeft; DupLineItem dpRecordRight; int i = 0; int j = 0; int ret; while (true) { dpRecordLeft = null; dpRecordRight = null; if (i >= sourceLeft.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>"; } i = -1; } else if (i != -1) { try { dpRecordLeft = (DupLineItem)sourceLeft[i]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>"; } } catch (Exception e) { Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>"); throw (ex); } } if (j >= sourceRight.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>"; } j = -1; } else if (j != -1) { try { dpRecordRight = (DupLineItem)sourceRight[j]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>"; } } catch { Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>"); throw (ex); } } if (i == -1 && j == -1) { if (bOutputDebugInfo == true) { strDebugInfo += "i,j都等于-1跳出<br/>"; } break; } if (dpRecordLeft == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>"; } ret = 1; } else if (dpRecordRight == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>"; } ret = -1; } else { ret = dpRecordLeft.CompareTo(dpRecordRight); //MyCompareTo(oldOneKey); //改CompareTO if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>"; } } if (String.Compare(strStyle, "OR", true) == 0 && targetMiddle != null) { if (ret == 0) { // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } else if (ret < 0) { targetMiddle.Add(dpRecordLeft); i++; } else if (ret > 0) { targetMiddle.Add(dpRecordRight); j++; } continue; } if (ret == 0 && targetMiddle != null) { if (bOutputDebugInfo == true) { strDebugInfo += "ret等于0,加到targetMiddle里面<br/>"; } // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } if (ret < 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret小于0,加到targetLeft里面<br/>"; } if (targetLeft != null && dpRecordLeft != null) targetLeft.Add(dpRecordLeft); i++; } if (ret > 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret大于0,加到targetRight里面<br/>"; } if (targetRight != null && dpRecordRight != null) targetRight.Add(dpRecordRight); j++; } } return 0; }
// 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 public static int Merge(string strStyle, DupResultSet sourceLeft, DupResultSet sourceRight, DupResultSet targetLeft, DupResultSet targetMiddle, DupResultSet targetRight, bool bOutputDebugInfo, out string strDebugInfo, out string strError) { strDebugInfo = ""; strError = ""; if (sourceLeft.m_streamSmall == null) { throw new Exception("sourceLeft结果集对象未建索引"); } if (sourceRight.m_streamSmall == null) { throw new Exception("sourceRight结果集对象未建索引"); } if (bOutputDebugInfo == true) { strDebugInfo += "strStyle值:" + strStyle + "<br/>"; strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>"; strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>"; } if (String.Compare(strStyle, "OR", true) == 0) { if (targetLeft != null || targetRight != null) { Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null"); throw (ex); } } DupLineItem dpRecordLeft; DupLineItem dpRecordRight; int i = 0; int j = 0; int ret; while (true) { dpRecordLeft = null; dpRecordRight = null; if (i >= sourceLeft.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>"; } i = -1; } else if (i != -1) { try { dpRecordLeft = (DupLineItem)sourceLeft[i]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>"; } } catch (Exception e) { Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>"); throw (ex); } } if (j >= sourceRight.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>"; } j = -1; } else if (j != -1) { try { dpRecordRight = (DupLineItem)sourceRight[j]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>"; } } catch { Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>"); throw (ex); } } if (i == -1 && j == -1) { if (bOutputDebugInfo == true) { strDebugInfo += "i,j都等于-1跳出<br/>"; } break; } if (dpRecordLeft == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>"; } ret = 1; } else if (dpRecordRight == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>"; } ret = -1; } else { ret = dpRecordLeft.CompareTo(dpRecordRight); //MyCompareTo(oldOneKey); //改CompareTO if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>"; } } if (String.Compare(strStyle, "OR", true) == 0 && targetMiddle != null) { if (ret == 0) { // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } else if (ret < 0) { targetMiddle.Add(dpRecordLeft); i++; } else if (ret > 0) { targetMiddle.Add(dpRecordRight); j++; } continue; } if (ret == 0 && targetMiddle != null) { if (bOutputDebugInfo == true) { strDebugInfo += "ret等于0,加到targetMiddle里面<br/>"; } // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } if (ret < 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret小于0,加到targetLeft里面<br/>"; } if (targetLeft != null && dpRecordLeft != null) { targetLeft.Add(dpRecordLeft); } i++; } if (ret > 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret大于0,加到targetRight里面<br/>"; } if (targetRight != null && dpRecordRight != null) { targetRight.Add(dpRecordRight); } j++; } } return(0); }
// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") strSearchStyle = "exact"; string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* RmsChannel channel = Channels.GetChannel(this.WsUrl); if (channel == null) { strError = "get channel error"; goto ERROR1; } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) return 0; // not found long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List<string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ; ) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) continue; DupLineItem item = new DupLineItem(); item.Path = strPath; item.Weight = nWeight; item.Threshold = nThreshold; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) break; } return 1; ERROR1: return -1; }