// 获得查重检索命中结果 // parameters: // lStart 返回命中结果集起始位置 // lCount 返回命中结果集的记录个数 // strBrowseInfoStyle 所返回的DupSearchResult中包含哪些信息 // "cols" 包含浏览列 // "excludecolsoflowthreshold" 不包含权值低于阈值的行的浏览列。要在同时包含cols时才起作用 // searchresults 包含记录信息的DupSearchResult数组 public LibraryServerResult GetDupSearchResult( SessionInfo sessioninfo, long lStart, long lCount, string strBrowseInfoStyle, out DupSearchResult[] searchresults) { string strError = ""; searchresults = null; int nRet = 0; LibraryServerResult result = new LibraryServerResult(); RmsChannel channel = sessioninfo.Channels.GetChannel(this.WsUrl); if (channel == null) { strError = "get channel error"; goto ERROR1; } DupResultSet dupset = sessioninfo.DupResultSet; if (dupset == null) { strError = "查重结果集不存在"; goto ERROR1; } dupset.EnsureCreateIndex(getTempFileName); int nCount = (int)lCount; int nStart = (int)lStart; if (nCount == -1) { nCount = (int)dupset.Count - nStart; if (nCount < 0) { nCount = 0; } } else { if (nCount > (int)dupset.Count - nStart) { nCount = (int)dupset.Count - nStart; if (nCount < 0) { nCount = 0; } } } bool bDetail = (StringUtil.IsInList("detail", strBrowseInfoStyle)); bool bExcludeCols = (StringUtil.IsInList("excludecolsoflowthreshold", strBrowseInfoStyle) == true); bool bCols = (StringUtil.IsInList("cols", strBrowseInfoStyle) == true); List <string> pathlist = new List <string>(); List <DupSearchResult> results = new List <DupSearchResult>(); for (int i = 0; i < nCount; i++) // BUG nStart + { DupLineItem item = (DupLineItem)dupset[nStart + i]; // changed DupSearchResult result_item = new DupSearchResult(); results.Add(result_item); result_item.Path = item.Path; result_item.Weight = item.Weight; result_item.Threshold = item.Threshold; if (bDetail) { result_item.Detail = item.Detail; } // paths[i] = item.Path; if (bCols == true) { if (bExcludeCols == true && item.Weight < item.Threshold) { } else { pathlist.Add(item.Path); } } } if (pathlist.Count > 0) { // string[] paths = new string[pathlist.Count]; string[] paths = StringUtil.FromListString(pathlist); nRet = channel.GetBrowseRecords(paths, "cols", out ArrayList aRecord, out strError); if (nRet == -1) { strError = "GetBrowseRecords() error: " + strError; goto ERROR1; } int j = 0; for (int i = 0; i < results.Count; i++) { DupSearchResult result_item = results[i]; if (result_item.Path != pathlist[j]) { continue; } string[] cols = (string[])aRecord[j]; results[i].Cols = cols; // style中不包含id j++; if (j >= pathlist.Count) { break; } } } searchresults = new DupSearchResult[results.Count]; results.CopyTo(searchresults); result.Value = searchresults.Length; return(result); ERROR1: result.Value = -1; result.ErrorCode = ErrorCode.SystemError; result.ErrorInfo = strError; return(result); }
// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") { strSearchStyle = "exact"; } string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* * RmsChannel channel = Channels.GetChannel(this.WsUrl); * if (channel == null) * { * strError = "get channel error"; * goto ERROR1; * } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { return(0); // not found } long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List <string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ;) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) { continue; } DupLineItem item = new DupLineItem { Path = strPath, Weight = nWeight, Threshold = nThreshold, Detail = BuildDetail(strDbName, strFrom, strKey, strSearchStyle, nWeight) }; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) { break; } } return(1); ERROR1: return(-1); }
// 进行查重 // parameters: // sessioninfo 仅仅用来存放DupResultSet,不应该用来sessioninfo.GetChannel(),而要用channel来进行检索操作 // channel // strOriginBiblioRecPath 发起的书目记录路径 // strOriginBiblioRecXml 发起的书目记录XML // strProjectName 查重方案名 // strStyle includeoriginrecord输出结果中包含发起记录(缺省为不包含) // return: // -1 error // 0 not found // 其他 命中记录条数 public LibraryServerResult SearchDup( SessionInfo sessioninfo1, RmsChannel channel, string strOriginBiblioRecPath, string strOriginBiblioRecXml, string strProjectName, string strStyle, out string strUsedProjectName) { string strError = ""; int nRet = 0; strUsedProjectName = ""; string strDebugInfo = ""; strStyle = strStyle.ToLower(); bool bIncludeOriginRecord = StringUtil.IsInList("includeoriginrecord", strStyle); LibraryServerResult result = new LibraryServerResult(); // 如果没有给出方案名,则需要在<default>元素中找到一个书目库的缺省查重方案 if (String.IsNullOrEmpty(strProjectName) == true) { if (String.IsNullOrEmpty(strOriginBiblioRecPath) == true) { strError = "既没有给出查重方案名,也没有给出记录路径,无法进行查重"; goto ERROR1; } string strOriginBiblioDbName = ResPath.GetDbName(strOriginBiblioRecPath); XmlNode nodeDefault = this.LibraryCfgDom.DocumentElement.SelectSingleNode("//dup/default[@origin='" + strOriginBiblioDbName + "']"); if (nodeDefault == null) { strError = "在没有明确指定查重方案名的情况下,本希望通过相关书目库的缺省查重方案名进行查重。但目前系统没有为书目库 '" + strOriginBiblioDbName + "' 定义缺省查重方案名,无法进行查重"; goto ERROR1; } string strDefaultProjectName = DomUtil.GetAttr(nodeDefault, "project"); if (String.IsNullOrEmpty(strDefaultProjectName) == true) { strError = "书目库 '" + strOriginBiblioDbName + "' 的<default>元素中未定义project属性值"; goto ERROR1; } strProjectName = strDefaultProjectName; } strUsedProjectName = strProjectName; // 获得查重方案定义节点 // return: // -1 出错 // 0 not found // 1 found nRet = GetDupProjectNode(strProjectName, out XmlNode nodeProject, out strError); if (nRet == 0 || nRet == -1) { goto ERROR1; } Debug.Assert(nodeProject != null, ""); DupResultSet alldatabase_set = null; // 所有库的结果集 XmlNodeList nodeDatabases = nodeProject.SelectNodes("database"); // 循环,针对每个数据库进行检索 for (int i = 0; i < nodeDatabases.Count; i++) { XmlNode nodeDatabase = nodeDatabases[i]; string strDatabaseName = DomUtil.GetAttr(nodeDatabase, "name"); string strThreshold = DomUtil.GetAttr(nodeDatabase, "threshold"); int nThreshold = 0; try { nThreshold = Convert.ToInt32(strThreshold); } catch { } List <AccessKeyInfo> aKeyLine = null; // 模拟创建检索点,以获得检索点列表 // return: // -1 error // 0 succeed nRet = GetKeys( // sessioninfo.Channels, channel, strOriginBiblioRecPath, strOriginBiblioRecXml, out aKeyLine, out strError); if (nRet == -1) { goto ERROR1; } DupResultSet onedatabase_set = null; // 一个库的结果集 try { XmlNodeList accesspoints = nodeDatabase.SelectNodes("accessPoint"); // <accessPoint>循环 for (int j = 0; j < accesspoints.Count; j++) { XmlNode accesspoint = accesspoints[j]; string strFrom = DomUtil.GetAttr(accesspoint, "name"); // 获得from所对应的key List <string> keys = GetKeyByFrom(aKeyLine, strFrom); if (keys.Count == 0) { continue; } string strWeight = DomUtil.GetAttr(accesspoint, "weight"); string strSearchStyle = DomUtil.GetAttr(accesspoint, "searchStyle"); int nWeight = 0; try { nWeight = Convert.ToInt32(strWeight); } catch { // 警告定义问题? } for (int k = 0; k < keys.Count; k++) { string strKey = (string)keys[k]; if (strKey == "") { continue; } DupResultSet dupset = null; try { // 针对一个from进行检索 // return: // -1 error // 0 not found // 1 found nRet = SearchOneFrom( // sessioninfo.Channels, channel, strDatabaseName, strFrom, strKey, strSearchStyle, nWeight, nThreshold, 5000, // ??? (bIncludeOriginRecord == false) ? strOriginBiblioRecPath : null, out dupset, out strError); if (nRet == -1) { // ??? 警告检索错误? continue; } if (onedatabase_set == null) { onedatabase_set = dupset; dupset = null; // 避免出 try 范围时被释放。因为内容已经转移给 onedatabase_set 了 continue; } if (nRet == 0) { continue; } Debug.Assert(dupset != null, ""); if (onedatabase_set.Sorted == true) { onedatabase_set.EnsureCreateIndex(getTempFileName); } else { onedatabase_set.Sort(getTempFileName); } // dupset.EnsureCreateIndex(getTempFileName); // 2017/4/14 dupset.Sort(getTempFileName); // Sort() 里面自动确保了创建 Index // 将dupset和前一个set归并 // 归并可以参考ResultSet中的Merge算法 DupResultSet tempset = new DupResultSet(); tempset.Open(false, getTempFileName); // 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 nRet = DupResultSet.Merge("OR", onedatabase_set, dupset, null, // targetLeft, tempset, null, // targetRight, false, out strDebugInfo, out strError); if (nRet == -1) { goto ERROR1; } { if (onedatabase_set != null) { onedatabase_set.Dispose(); } onedatabase_set = tempset; onedatabase_set.Sorted = true; // 归并后产生的结果集自然是符合顺序的 } } finally { if (dupset != null) { dupset.Dispose(); } } } // end of k loop } // end of j loop if (alldatabase_set == null) { alldatabase_set = onedatabase_set; onedatabase_set = null; // 避免出 try 范围时被释放。因为内容已经转移给 alldatabase_set 了 continue; } // 合并 if (onedatabase_set != null) { DupResultSet tempset0 = new DupResultSet(); tempset0.Open(false, getTempFileName); if (alldatabase_set.Sorted == true) { alldatabase_set.EnsureCreateIndex(getTempFileName); } else { alldatabase_set.Sort(getTempFileName); } // onedatabase_set.EnsureCreateIndex(getTempFileName); // 2017/4/14 onedatabase_set.Sort(getTempFileName); // Sort() 里面自动确保了创建 Index nRet = DupResultSet.Merge("OR", alldatabase_set, onedatabase_set, null, // targetLeft, tempset0, null, // targetRight, false, out strDebugInfo, out strError); if (nRet == -1) { goto ERROR1; } { if (alldatabase_set != null) { alldatabase_set.Dispose(); } alldatabase_set = tempset0; alldatabase_set.Sorted = true; } } } finally { if (onedatabase_set != null) { onedatabase_set.Dispose(); } } } // 最后要按照 Weight和Threshold的差额 对结果集进行排序,便于输出 if (alldatabase_set != null) { alldatabase_set.SortStyle = DupResultSetSortStyle.OverThreshold; alldatabase_set.Sort(getTempFileName); } { if (sessioninfo1.DupResultSet != null) { sessioninfo1.DupResultSet.Dispose(); } sessioninfo1.DupResultSet = alldatabase_set; } if (alldatabase_set != null) { result.Value = alldatabase_set.Count; } else { result.Value = 0; } return(result); ERROR1: result.Value = -1; result.ErrorCode = ErrorCode.SystemError; result.ErrorInfo = strError; return(result); }
// 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 public static int Merge(string strStyle, DupResultSet sourceLeft, DupResultSet sourceRight, DupResultSet targetLeft, DupResultSet targetMiddle, DupResultSet targetRight, bool bOutputDebugInfo, out string strDebugInfo, out string strError) { strDebugInfo = ""; strError = ""; if (sourceLeft.m_streamSmall == null) { throw new Exception("sourceLeft结果集对象未建索引"); } if (sourceRight.m_streamSmall == null) { throw new Exception("sourceRight结果集对象未建索引"); } if (bOutputDebugInfo == true) { strDebugInfo += "strStyle值:" + strStyle + "<br/>"; strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>"; strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>"; } if (String.Compare(strStyle, "OR", true) == 0) { if (targetLeft != null || targetRight != null) { Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null"); throw (ex); } } DupLineItem dpRecordLeft; DupLineItem dpRecordRight; int i = 0; int j = 0; int ret; while (true) { dpRecordLeft = null; dpRecordRight = null; if (i >= sourceLeft.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>"; } i = -1; } else if (i != -1) { try { dpRecordLeft = (DupLineItem)sourceLeft[i]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>"; } } catch (Exception e) { Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>"); throw (ex); } } if (j >= sourceRight.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>"; } j = -1; } else if (j != -1) { try { dpRecordRight = (DupLineItem)sourceRight[j]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>"; } } catch { Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>"); throw (ex); } } if (i == -1 && j == -1) { if (bOutputDebugInfo == true) { strDebugInfo += "i,j都等于-1跳出<br/>"; } break; } if (dpRecordLeft == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>"; } ret = 1; } else if (dpRecordRight == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>"; } ret = -1; } else { ret = dpRecordLeft.CompareTo(dpRecordRight); //MyCompareTo(oldOneKey); //改CompareTO if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>"; } } if (String.Compare(strStyle, "OR", true) == 0 && targetMiddle != null) { if (ret == 0) { // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } else if (ret < 0) { targetMiddle.Add(dpRecordLeft); i++; } else if (ret > 0) { targetMiddle.Add(dpRecordRight); j++; } continue; } if (ret == 0 && targetMiddle != null) { if (bOutputDebugInfo == true) { strDebugInfo += "ret等于0,加到targetMiddle里面<br/>"; } // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } if (ret < 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret小于0,加到targetLeft里面<br/>"; } if (targetLeft != null && dpRecordLeft != null) { targetLeft.Add(dpRecordLeft); } i++; } if (ret > 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret大于0,加到targetRight里面<br/>"; } if (targetRight != null && dpRecordRight != null) { targetRight.Add(dpRecordRight); } j++; } } return(0); }
// 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 public static int Merge(string strStyle, DupResultSet sourceLeft, DupResultSet sourceRight, DupResultSet targetLeft, DupResultSet targetMiddle, DupResultSet targetRight, bool bOutputDebugInfo, out string strDebugInfo, out string strError) { strDebugInfo = ""; strError = ""; if (sourceLeft.m_streamSmall == null) { throw new Exception("sourceLeft结果集对象未建索引"); } if (sourceRight.m_streamSmall == null) { throw new Exception("sourceRight结果集对象未建索引"); } if (bOutputDebugInfo == true) { strDebugInfo += "strStyle值:" + strStyle + "<br/>"; strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>"; strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>"; } if (String.Compare(strStyle, "OR", true) == 0) { if (targetLeft != null || targetRight != null) { Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null"); throw (ex); } } DupLineItem dpRecordLeft; DupLineItem dpRecordRight; int i = 0; int j = 0; int ret; while (true) { dpRecordLeft = null; dpRecordRight = null; if (i >= sourceLeft.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>"; } i = -1; } else if (i != -1) { try { dpRecordLeft = (DupLineItem)sourceLeft[i]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>"; } } catch (Exception e) { Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>"); throw (ex); } } if (j >= sourceRight.Count) { if (bOutputDebugInfo == true) { strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>"; } j = -1; } else if (j != -1) { try { dpRecordRight = (DupLineItem)sourceRight[j]; if (bOutputDebugInfo == true) { strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>"; } } catch { Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>"); throw (ex); } } if (i == -1 && j == -1) { if (bOutputDebugInfo == true) { strDebugInfo += "i,j都等于-1跳出<br/>"; } break; } if (dpRecordLeft == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>"; } ret = 1; } else if (dpRecordRight == null) { if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>"; } ret = -1; } else { ret = dpRecordLeft.CompareTo(dpRecordRight); //MyCompareTo(oldOneKey); //改CompareTO if (bOutputDebugInfo == true) { strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>"; } } if (String.Compare(strStyle, "OR", true) == 0 && targetMiddle != null) { if (ret == 0) { // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } else if (ret < 0) { targetMiddle.Add(dpRecordLeft); i++; } else if (ret > 0) { targetMiddle.Add(dpRecordRight); j++; } continue; } if (ret == 0 && targetMiddle != null) { if (bOutputDebugInfo == true) { strDebugInfo += "ret等于0,加到targetMiddle里面<br/>"; } // 左右任意取一个就可以,但是要加上权值 2007/7/2 dpRecordLeft.Weight += dpRecordRight.Weight; targetMiddle.Add(dpRecordLeft); i++; j++; } if (ret < 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret小于0,加到targetLeft里面<br/>"; } if (targetLeft != null && dpRecordLeft != null) targetLeft.Add(dpRecordLeft); i++; } if (ret > 0) { if (bOutputDebugInfo == true) { strDebugInfo += "ret大于0,加到targetRight里面<br/>"; } if (targetRight != null && dpRecordRight != null) targetRight.Add(dpRecordRight); j++; } } return 0; }
// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") strSearchStyle = "exact"; string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* RmsChannel channel = Channels.GetChannel(this.WsUrl); if (channel == null) { strError = "get channel error"; goto ERROR1; } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) return 0; // not found long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List<string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ; ) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) continue; DupLineItem item = new DupLineItem(); item.Path = strPath; item.Weight = nWeight; item.Threshold = nThreshold; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) break; } return 1; ERROR1: return -1; }
// 进行查重 // parameters: // sessioninfo 仅仅用来存放DupResultSet,不应该用来sessioninfo.GetChannel(),而要用channel来进行检索操作 // channel // strOriginBiblioRecPath 发起的书目记录路径 // strOriginBiblioRecXml 发起的书目记录XML // strProjectName 查重方案名 // strStyle includeoriginrecord输出结果中包含发起记录(缺省为不包含) // return: // -1 error // 0 not found // 其他 命中记录条数 public LibraryServerResult SearchDup( SessionInfo sessioninfo1, RmsChannel channel, string strOriginBiblioRecPath, string strOriginBiblioRecXml, string strProjectName, string strStyle, out string strUsedProjectName) { string strError = ""; int nRet = 0; strUsedProjectName = ""; string strDebugInfo = ""; strStyle = strStyle.ToLower(); bool bIncludeOriginRecord = StringUtil.IsInList("includeoriginrecord", strStyle); LibraryServerResult result = new LibraryServerResult(); // 如果没有给出方案名,则需要在<default>元素中找到一个书目库的缺省查重方案 if (String.IsNullOrEmpty(strProjectName) == true) { if (String.IsNullOrEmpty(strOriginBiblioRecPath) == true) { strError = "既没有给出查重方案名,也没有给出记录路径,无法进行查重"; goto ERROR1; } string strOriginBiblioDbName = ResPath.GetDbName(strOriginBiblioRecPath); XmlNode nodeDefault = this.LibraryCfgDom.DocumentElement.SelectSingleNode("//dup/default[@origin='" + strOriginBiblioDbName + "']"); if (nodeDefault == null) { strError = "在没有明确指定查重方案名的情况下,本希望通过相关书目库的缺省查重方案名进行查重。但目前系统没有为书目库 '" + strOriginBiblioDbName + "' 定义缺省查重方案名,无法进行查重"; goto ERROR1; } string strDefaultProjectName = DomUtil.GetAttr(nodeDefault, "project"); if (String.IsNullOrEmpty(strDefaultProjectName) == true) { strError = "书目库 '" + strOriginBiblioDbName + "' 的<default>元素中未定义project属性值"; goto ERROR1; } strProjectName = strDefaultProjectName; } strUsedProjectName = strProjectName; XmlNode nodeProject = null; // 获得查重方案定义节点 // return: // -1 出错 // 0 not found // 1 found nRet = GetDupProjectNode(strProjectName, out nodeProject, out strError); if (nRet == 0 || nRet == -1) goto ERROR1; Debug.Assert(nodeProject != null, ""); DupResultSet alldatabase_set = null; // 所有库的结果集 XmlNodeList nodeDatabases = nodeProject.SelectNodes("database"); // 循环,针对每个数据库进行检索 for (int i = 0; i < nodeDatabases.Count; i++) { XmlNode nodeDatabase = nodeDatabases[i]; string strDatabaseName = DomUtil.GetAttr(nodeDatabase, "name"); string strThreshold = DomUtil.GetAttr(nodeDatabase, "threshold"); int nThreshold = 0; try { nThreshold = Convert.ToInt32(strThreshold); } catch { } List<AccessKeyInfo> aKeyLine = null; // 模拟创建检索点,以获得检索点列表 // return: // -1 error // 0 succeed nRet = GetKeys( // sessioninfo.Channels, channel, strOriginBiblioRecPath, strOriginBiblioRecXml, out aKeyLine, out strError); if (nRet == -1) goto ERROR1; DupResultSet onedatabase_set = null; // 一个库的结果集 try { XmlNodeList accesspoints = nodeDatabase.SelectNodes("accessPoint"); // <accessPoint>循环 for (int j = 0; j < accesspoints.Count; j++) { XmlNode accesspoint = accesspoints[j]; string strFrom = DomUtil.GetAttr(accesspoint, "name"); // 获得from所对应的key List<string> keys = GetKeyByFrom(aKeyLine, strFrom); if (keys.Count == 0) continue; string strWeight = DomUtil.GetAttr(accesspoint, "weight"); string strSearchStyle = DomUtil.GetAttr(accesspoint, "searchStyle"); int nWeight = 0; try { nWeight = Convert.ToInt32(strWeight); } catch { // 警告定义问题? } for (int k = 0; k < keys.Count; k++) { string strKey = (string)keys[k]; if (strKey == "") continue; DupResultSet dupset = null; try { // 针对一个from进行检索 // return: // -1 error // 0 not found // 1 found nRet = SearchOneFrom( // sessioninfo.Channels, channel, strDatabaseName, strFrom, strKey, strSearchStyle, nWeight, nThreshold, 5000, // ??? (bIncludeOriginRecord == false) ? strOriginBiblioRecPath : null, out dupset, out strError); if (nRet == -1) { // ??? 警告检索错误? continue; } if (onedatabase_set == null) { onedatabase_set = dupset; dupset = null; // 避免出 try 范围时被释放。因为内容已经转移给 onedatabase_set 了 continue; } if (nRet == 0) continue; Debug.Assert(dupset != null, ""); onedatabase_set.EnsureCreateIndex(getTempFileName); dupset.EnsureCreateIndex(getTempFileName); // 将dupset和前一个set归并 // 归并可以参考ResultSet中的Merge算法 DupResultSet tempset = new DupResultSet(); tempset.Open(false, getTempFileName); // 功能: 合并两个数组 // parameters: // strStyle 运算风格 OR , AND , SUB // sourceLeft 源左边结果集 // sourceRight 源右边结果集 // targetLeft 目标左边结果集 // targetMiddle 目标中间结果集 // targetRight 目标右边结果集 // bOutputDebugInfo 是否输出处理信息 // strDebugInfo 处理信息 // return // -1 出错 // 0 成功 nRet = DupResultSet.Merge("OR", onedatabase_set, dupset, null, // targetLeft, tempset, null, // targetRight, false, out strDebugInfo, out strError); if (nRet == -1) goto ERROR1; { if (onedatabase_set != null) onedatabase_set.Dispose(); onedatabase_set = tempset; } } finally { if (dupset != null) dupset.Dispose(); } } // end of k loop } // end of j loop if (alldatabase_set == null) { alldatabase_set = onedatabase_set; onedatabase_set = null; // 避免出 try 范围时被释放。因为内容已经转移给 alldatabase_set 了 continue; } // 合并 if (onedatabase_set != null) { DupResultSet tempset0 = new DupResultSet(); tempset0.Open(false, getTempFileName); alldatabase_set.EnsureCreateIndex(getTempFileName); onedatabase_set.EnsureCreateIndex(getTempFileName); nRet = DupResultSet.Merge("OR", alldatabase_set, onedatabase_set, null, // targetLeft, tempset0, null, // targetRight, false, out strDebugInfo, out strError); if (nRet == -1) goto ERROR1; { if (alldatabase_set != null) alldatabase_set.Dispose(); alldatabase_set = tempset0; } } } finally { if (onedatabase_set != null) onedatabase_set.Dispose(); } } // 最后要按照 Weight和Threshold的差额 对结果集进行排序,便于输出 if (alldatabase_set != null) { alldatabase_set.SortStyle = DupResultSetSortStyle.OverThreshold; alldatabase_set.Sort(getTempFileName); } { if (sessioninfo1.DupResultSet != null) sessioninfo1.DupResultSet.Dispose(); sessioninfo1.DupResultSet = alldatabase_set; } if (alldatabase_set != null) result.Value = alldatabase_set.Count; else result.Value = 0; return result; ERROR1: result.Value = -1; result.ErrorCode = ErrorCode.SystemError; result.ErrorInfo = strError; return result; }