// 实现IComparable接口的CompareTo()方法, // obj: An object to compare with this instance // 返回值 A 32-bit signed integer that indicates the relative order of the comparands. The return value has these meanings: // Less than zero: This instance is less than obj. // Zero: This instance is equal to obj. // Greater than zero: This instance is greater than obj. // 异常: ArgumentException,obj is not the same type as this instance. public override int CompareTo(object obj) { DupLineItem item = (DupLineItem)obj; // 小在前 return(String.Compare(this.Path, item.Path)); }
// 按照权值排序 public int CompareWeightTo(object obj) { DupLineItem item = (DupLineItem)obj; int delta = this.Weight - item.Weight; if (delta != 0) { return(-1 * delta); // 大在前 } // 如权值相同,再按照路径排序 // 小在前 return(String.Compare(this.Path, item.Path)); }
// 按照差额排序。所谓差额就是权值和阈值的差额 public int CompareOverThresholdTo(object obj) { DupLineItem item = (DupLineItem)obj; int over1 = this.Weight - this.Threshold; int over2 = item.Weight - item.Threshold; int delta = over1 - over2; if (delta != 0) { return(-1 * delta); // 大在前 } // 如差额相同,再按照路径排序 // 小在前 return(String.Compare(this.Path, item.Path)); }
// 使得可以按照多种风格排序 public override int Compare(long lPtr1, long lPtr2) { if (lPtr1 < 0 && lPtr2 < 0) { return(0); } else if (lPtr1 >= 0 && lPtr2 < 0) { return(1); } else if (lPtr1 < 0 && lPtr2 >= 0) { return(-1); } DupLineItem item1 = (DupLineItem)GetCompareItemByOffset(lPtr1); DupLineItem item2 = (DupLineItem)GetCompareItemByOffset(lPtr2); if (this.SortStyle == DupResultSetSortStyle.Path) { return(item1.CompareTo(item2)); } else if (this.SortStyle == DupResultSetSortStyle.Weight) { return(item1.CompareWeightTo(item2)); } else if (this.SortStyle == DupResultSetSortStyle.OverThreshold) { return(item1.CompareOverThresholdTo(item2)); } else { Debug.Assert(false, "invalid sort style"); return(0); } }
// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") { strSearchStyle = "exact"; } string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* * RmsChannel channel = Channels.GetChannel(this.WsUrl); * if (channel == null) * { * strError = "get channel error"; * goto ERROR1; * } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { return(0); // not found } long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List <string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ;) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) { goto ERROR1; } if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) { continue; } DupLineItem item = new DupLineItem { Path = strPath, Weight = nWeight, Threshold = nThreshold, Detail = BuildDetail(strDbName, strFrom, strKey, strSearchStyle, nWeight) }; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) { break; } } return(1); ERROR1: return(-1); }
// 获得查重检索命中结果 // parameters: // lStart 返回命中结果集起始位置 // lCount 返回命中结果集的记录个数 // strBrowseInfoStyle 所返回的DupSearchResult中包含哪些信息 // "cols" 包含浏览列 // "excludecolsoflowthreshold" 不包含权值低于阈值的行的浏览列。要在同时包含cols时才起作用 // searchresults 包含记录信息的DupSearchResult数组 public LibraryServerResult GetDupSearchResult( SessionInfo sessioninfo, long lStart, long lCount, string strBrowseInfoStyle, out DupSearchResult[] searchresults) { string strError = ""; searchresults = null; int nRet = 0; LibraryServerResult result = new LibraryServerResult(); RmsChannel channel = sessioninfo.Channels.GetChannel(this.WsUrl); if (channel == null) { strError = "get channel error"; goto ERROR1; } DupResultSet dupset = sessioninfo.DupResultSet; if (dupset == null) { strError = "查重结果集不存在"; goto ERROR1; } dupset.EnsureCreateIndex(getTempFileName); int nCount = (int)lCount; int nStart = (int)lStart; if (nCount == -1) { nCount = (int)dupset.Count - nStart; if (nCount < 0) { nCount = 0; } } else { if (nCount > (int)dupset.Count - nStart) { nCount = (int)dupset.Count - nStart; if (nCount < 0) { nCount = 0; } } } bool bDetail = (StringUtil.IsInList("detail", strBrowseInfoStyle)); bool bExcludeCols = (StringUtil.IsInList("excludecolsoflowthreshold", strBrowseInfoStyle) == true); bool bCols = (StringUtil.IsInList("cols", strBrowseInfoStyle) == true); List <string> pathlist = new List <string>(); List <DupSearchResult> results = new List <DupSearchResult>(); for (int i = 0; i < nCount; i++) // BUG nStart + { DupLineItem item = (DupLineItem)dupset[nStart + i]; // changed DupSearchResult result_item = new DupSearchResult(); results.Add(result_item); result_item.Path = item.Path; result_item.Weight = item.Weight; result_item.Threshold = item.Threshold; if (bDetail) { result_item.Detail = item.Detail; } // paths[i] = item.Path; if (bCols == true) { if (bExcludeCols == true && item.Weight < item.Threshold) { } else { pathlist.Add(item.Path); } } } if (pathlist.Count > 0) { // string[] paths = new string[pathlist.Count]; string[] paths = StringUtil.FromListString(pathlist); nRet = channel.GetBrowseRecords(paths, "cols", out ArrayList aRecord, out strError); if (nRet == -1) { strError = "GetBrowseRecords() error: " + strError; goto ERROR1; } int j = 0; for (int i = 0; i < results.Count; i++) { DupSearchResult result_item = results[i]; if (result_item.Path != pathlist[j]) { continue; } string[] cols = (string[])aRecord[j]; results[i].Cols = cols; // style中不包含id j++; if (j >= pathlist.Count) { break; } } } searchresults = new DupSearchResult[results.Count]; results.CopyTo(searchresults); result.Value = searchresults.Length; return(result); ERROR1: result.Value = -1; result.ErrorCode = ErrorCode.SystemError; result.ErrorInfo = strError; return(result); }
// 针对一个from进行检索 // parameters: // strExcludeBiblioRecPath 要排除掉的记录路径 // return: // -1 error // 0 not found // 1 found int SearchOneFrom( // RmsChannelCollection Channels, RmsChannel channel, string strDbName, string strFrom, string strKey, string strSearchStyle, int nWeight, int nThreshold, long nMax, string strExcludeBiblioRecPath, out DupResultSet dupset, out string strError) { strError = ""; dupset = null; long lRet = 0; if (strSearchStyle == "") strSearchStyle = "exact"; string strQueryXml = "<target list='" + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14 + "'><item><word>" + StringUtil.GetXmlStringSimple(strKey) + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>"; string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight); /* RmsChannel channel = Channels.GetChannel(this.WsUrl); if (channel == null) { strError = "get channel error"; goto ERROR1; } * */ Debug.Assert(channel != null, ""); lRet = channel.DoSearch(strQueryXml, "dup", "", // strOuputStyle out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) return 0; // not found long lHitCount = lRet; long lStart = 0; long lPerCount = Math.Min(50, lHitCount); List<string> aPath = null; dupset = new DupResultSet(); dupset.Open(false, getTempFileName); // 获得结果集,对逐个记录进行处理 for (; ; ) { // TODO: 中间要可以中断 lRet = channel.DoGetSearchResult( "dup", // strResultSetName lStart, lPerCount, "zh", null, // stop out aPath, out strError); if (lRet == -1) goto ERROR1; if (lRet == 0) { strError = "未命中"; break; // ?? } // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环 // 处理浏览结果 for (int i = 0; i < aPath.Count; i++) { string strPath = aPath[i]; // 忽略发起记录的路径 if (strPath == strExcludeBiblioRecPath) continue; DupLineItem item = new DupLineItem(); item.Path = strPath; item.Weight = nWeight; item.Threshold = nThreshold; dupset.Add(item); } lStart += aPath.Count; if (lStart >= lHitCount || lPerCount <= 0) break; } return 1; ERROR1: return -1; }
// 合并两个事项的 Detail 部分 static void MergeDetail(DupLineItem left, DupLineItem right) { left.Detail = left.Detail + " OR " + right.Detail; }