Пример #1
0
        // 获得查重检索命中结果
        // parameters:
        //      lStart  返回命中结果集起始位置
        //      lCount  返回命中结果集的记录个数
        //      strBrowseInfoStyle  所返回的DupSearchResult中包含哪些信息
        //              "cols"  包含浏览列
        //              "excludecolsoflowthreshold" 不包含权值低于阈值的行的浏览列。要在同时包含cols时才起作用
        //      searchresults   包含记录信息的DupSearchResult数组
        public LibraryServerResult GetDupSearchResult(
            SessionInfo sessioninfo,
            long lStart,
            long lCount,
            string strBrowseInfoStyle,
            out DupSearchResult[] searchresults)
        {
            string strError = "";

            searchresults = null;
            int nRet = 0;

            LibraryServerResult result = new LibraryServerResult();

            RmsChannel channel = sessioninfo.Channels.GetChannel(this.WsUrl);

            if (channel == null)
            {
                strError = "get channel error";
                goto ERROR1;
            }

            DupResultSet dupset = sessioninfo.DupResultSet;

            if (dupset == null)
            {
                strError = "查重结果集不存在";
                goto ERROR1;
            }

            dupset.EnsureCreateIndex(getTempFileName);

            int nCount = (int)lCount;
            int nStart = (int)lStart;

            if (nCount == -1)
            {
                nCount = (int)dupset.Count - nStart;
                if (nCount < 0)
                {
                    nCount = 0;
                }
            }
            else
            {
                if (nCount > (int)dupset.Count - nStart)
                {
                    nCount = (int)dupset.Count - nStart;

                    if (nCount < 0)
                    {
                        nCount = 0;
                    }
                }
            }

            bool bDetail = (StringUtil.IsInList("detail", strBrowseInfoStyle));

            bool bExcludeCols = (StringUtil.IsInList("excludecolsoflowthreshold", strBrowseInfoStyle) == true);

            bool bCols = (StringUtil.IsInList("cols", strBrowseInfoStyle) == true);

            List <string> pathlist = new List <string>();

            List <DupSearchResult> results = new List <DupSearchResult>();

            for (int i = 0; i < nCount; i++)                        // BUG nStart +
            {
                DupLineItem item = (DupLineItem)dupset[nStart + i]; // changed

                DupSearchResult result_item = new DupSearchResult();
                results.Add(result_item);

                result_item.Path      = item.Path;
                result_item.Weight    = item.Weight;
                result_item.Threshold = item.Threshold;
                if (bDetail)
                {
                    result_item.Detail = item.Detail;
                }

                // paths[i] = item.Path;
                if (bCols == true)
                {
                    if (bExcludeCols == true && item.Weight < item.Threshold)
                    {
                    }
                    else
                    {
                        pathlist.Add(item.Path);
                    }
                }
            }

            if (pathlist.Count > 0)
            {
                // string[] paths = new string[pathlist.Count];
                string[] paths = StringUtil.FromListString(pathlist);

                nRet = channel.GetBrowseRecords(paths,
                                                "cols",
                                                out ArrayList aRecord,
                                                out strError);
                if (nRet == -1)
                {
                    strError = "GetBrowseRecords() error: " + strError;
                    goto ERROR1;
                }

                int j = 0;
                for (int i = 0; i < results.Count; i++)
                {
                    DupSearchResult result_item = results[i];
                    if (result_item.Path != pathlist[j])
                    {
                        continue;
                    }

                    string[] cols = (string[])aRecord[j];

                    results[i].Cols = cols;   // style中不包含id
                    j++;
                    if (j >= pathlist.Count)
                    {
                        break;
                    }
                }
            }

            searchresults = new DupSearchResult[results.Count];
            results.CopyTo(searchresults);

            result.Value = searchresults.Length;
            return(result);

ERROR1:
            result.Value     = -1;
            result.ErrorCode = ErrorCode.SystemError;
            result.ErrorInfo = strError;
            return(result);
        }
Пример #2
0
        // 针对一个from进行检索
        // parameters:
        //      strExcludeBiblioRecPath 要排除掉的记录路径
        // return:
        //      -1  error
        //      0   not found
        //      1   found
        int SearchOneFrom(
            // RmsChannelCollection Channels,
            RmsChannel channel,
            string strDbName,
            string strFrom,
            string strKey,
            string strSearchStyle,
            int nWeight,
            int nThreshold,
            long nMax,
            string strExcludeBiblioRecPath,
            out DupResultSet dupset,
            out string strError)
        {
            strError = "";
            dupset   = null;
            long lRet = 0;

            if (strSearchStyle == "")
            {
                strSearchStyle = "exact";
            }

            string strQueryXml = "<target list='"
                                 + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom) // 2007/9/14
                                 + "'><item><word>"
                                 + StringUtil.GetXmlStringSimple(strKey)
                                 + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>";

            string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight);

            /*
             * RmsChannel channel = Channels.GetChannel(this.WsUrl);
             * if (channel == null)
             * {
             *  strError = "get channel error";
             *  goto ERROR1;
             * }
             * */
            Debug.Assert(channel != null, "");

            lRet = channel.DoSearch(strQueryXml,
                                    "dup",
                                    "", // strOuputStyle
                                    out strError);
            if (lRet == -1)
            {
                goto ERROR1;
            }

            if (lRet == 0)
            {
                return(0);   // not found
            }
            long lHitCount = lRet;

            long          lStart    = 0;
            long          lPerCount = Math.Min(50, lHitCount);
            List <string> aPath     = null;

            dupset = new DupResultSet();
            dupset.Open(false, getTempFileName);

            // 获得结果集,对逐个记录进行处理
            for (; ;)
            {
                // TODO: 中间要可以中断

                lRet = channel.DoGetSearchResult(
                    "dup",   // strResultSetName
                    lStart,
                    lPerCount,
                    "zh",
                    null,   // stop
                    out aPath,
                    out strError);
                if (lRet == -1)
                {
                    goto ERROR1;
                }

                if (lRet == 0)
                {
                    strError = "未命中";
                    break;  // ??
                }

                // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环

                // 处理浏览结果
                for (int i = 0; i < aPath.Count; i++)
                {
                    string strPath = aPath[i];

                    // 忽略发起记录的路径
                    if (strPath == strExcludeBiblioRecPath)
                    {
                        continue;
                    }

                    DupLineItem item = new DupLineItem
                    {
                        Path      = strPath,
                        Weight    = nWeight,
                        Threshold = nThreshold,
                        Detail    = BuildDetail(strDbName,
                                                strFrom,
                                                strKey,
                                                strSearchStyle,
                                                nWeight)
                    };
                    dupset.Add(item);
                }

                lStart += aPath.Count;
                if (lStart >= lHitCount || lPerCount <= 0)
                {
                    break;
                }
            }

            return(1);

ERROR1:
            return(-1);
        }
Пример #3
0
        // 进行查重
        // parameters:
        //      sessioninfo 仅仅用来存放DupResultSet,不应该用来sessioninfo.GetChannel(),而要用channel来进行检索操作
        //      channel
        //      strOriginBiblioRecPath  发起的书目记录路径
        //      strOriginBiblioRecXml   发起的书目记录XML
        //      strProjectName  查重方案名
        //      strStyle    includeoriginrecord输出结果中包含发起记录(缺省为不包含)
        // return:
        //      -1  error
        //      0   not found
        //      其他    命中记录条数
        public LibraryServerResult SearchDup(
            SessionInfo sessioninfo1,
            RmsChannel channel,
            string strOriginBiblioRecPath,
            string strOriginBiblioRecXml,
            string strProjectName,
            string strStyle,
            out string strUsedProjectName)
        {
            string strError = "";
            int    nRet     = 0;

            strUsedProjectName = "";

            string strDebugInfo = "";

            strStyle = strStyle.ToLower();
            bool bIncludeOriginRecord = StringUtil.IsInList("includeoriginrecord", strStyle);

            LibraryServerResult result = new LibraryServerResult();

            // 如果没有给出方案名,则需要在<default>元素中找到一个书目库的缺省查重方案
            if (String.IsNullOrEmpty(strProjectName) == true)
            {
                if (String.IsNullOrEmpty(strOriginBiblioRecPath) == true)
                {
                    strError = "既没有给出查重方案名,也没有给出记录路径,无法进行查重";
                    goto ERROR1;
                }
                string strOriginBiblioDbName = ResPath.GetDbName(strOriginBiblioRecPath);

                XmlNode nodeDefault = this.LibraryCfgDom.DocumentElement.SelectSingleNode("//dup/default[@origin='" + strOriginBiblioDbName + "']");
                if (nodeDefault == null)
                {
                    strError = "在没有明确指定查重方案名的情况下,本希望通过相关书目库的缺省查重方案名进行查重。但目前系统没有为书目库 '" + strOriginBiblioDbName + "' 定义缺省查重方案名,无法进行查重";
                    goto ERROR1;
                }

                string strDefaultProjectName = DomUtil.GetAttr(nodeDefault, "project");
                if (String.IsNullOrEmpty(strDefaultProjectName) == true)
                {
                    strError = "书目库 '" + strOriginBiblioDbName + "' 的<default>元素中未定义project属性值";
                    goto ERROR1;
                }

                strProjectName = strDefaultProjectName;
            }

            strUsedProjectName = strProjectName;

            // 获得查重方案定义节点
            // return:
            //      -1  出错
            //      0   not found
            //      1   found
            nRet = GetDupProjectNode(strProjectName,
                                     out XmlNode nodeProject,
                                     out strError);
            if (nRet == 0 || nRet == -1)
            {
                goto ERROR1;
            }

            Debug.Assert(nodeProject != null, "");

            DupResultSet alldatabase_set = null;    // 所有库的结果集

            XmlNodeList nodeDatabases = nodeProject.SelectNodes("database");

            // 循环,针对每个数据库进行检索
            for (int i = 0; i < nodeDatabases.Count; i++)
            {
                XmlNode nodeDatabase    = nodeDatabases[i];
                string  strDatabaseName = DomUtil.GetAttr(nodeDatabase, "name");
                string  strThreshold    = DomUtil.GetAttr(nodeDatabase, "threshold");
                int     nThreshold      = 0;
                try
                {
                    nThreshold = Convert.ToInt32(strThreshold);
                }
                catch
                {
                }

                List <AccessKeyInfo> aKeyLine = null;
                // 模拟创建检索点,以获得检索点列表
                // return:
                //      -1  error
                //      0   succeed
                nRet = GetKeys(
                    // sessioninfo.Channels,
                    channel,
                    strOriginBiblioRecPath,
                    strOriginBiblioRecXml,
                    out aKeyLine,
                    out strError);
                if (nRet == -1)
                {
                    goto ERROR1;
                }

                DupResultSet onedatabase_set = null;    // 一个库的结果集
                try
                {
                    XmlNodeList accesspoints = nodeDatabase.SelectNodes("accessPoint");
                    // <accessPoint>循环
                    for (int j = 0; j < accesspoints.Count; j++)
                    {
                        XmlNode accesspoint = accesspoints[j];

                        string strFrom = DomUtil.GetAttr(accesspoint, "name");

                        // 获得from所对应的key
                        List <string> keys = GetKeyByFrom(aKeyLine,
                                                          strFrom);
                        if (keys.Count == 0)
                        {
                            continue;
                        }

                        string strWeight      = DomUtil.GetAttr(accesspoint, "weight");
                        string strSearchStyle = DomUtil.GetAttr(accesspoint, "searchStyle");

                        int nWeight = 0;
                        try
                        {
                            nWeight = Convert.ToInt32(strWeight);
                        }
                        catch
                        {
                            // 警告定义问题?
                        }

                        for (int k = 0; k < keys.Count; k++)
                        {
                            string strKey = (string)keys[k];
                            if (strKey == "")
                            {
                                continue;
                            }

                            DupResultSet dupset = null;
                            try
                            {
                                // 针对一个from进行检索
                                // return:
                                //      -1  error
                                //      0   not found
                                //      1   found
                                nRet = SearchOneFrom(
                                    // sessioninfo.Channels,
                                    channel,
                                    strDatabaseName,
                                    strFrom,
                                    strKey,
                                    strSearchStyle,
                                    nWeight,
                                    nThreshold,
                                    5000,   // ???
                                    (bIncludeOriginRecord == false) ? strOriginBiblioRecPath : null,
                                    out dupset,
                                    out strError);

                                if (nRet == -1)
                                {
                                    // ??? 警告检索错误?
                                    continue;
                                }

                                if (onedatabase_set == null)
                                {
                                    onedatabase_set = dupset;
                                    dupset          = null; // 避免出 try 范围时被释放。因为内容已经转移给 onedatabase_set 了
                                    continue;
                                }

                                if (nRet == 0)
                                {
                                    continue;
                                }

                                Debug.Assert(dupset != null, "");

                                if (onedatabase_set.Sorted == true)
                                {
                                    onedatabase_set.EnsureCreateIndex(getTempFileName);
                                }
                                else
                                {
                                    onedatabase_set.Sort(getTempFileName);
                                }
                                // dupset.EnsureCreateIndex(getTempFileName);
                                // 2017/4/14
                                dupset.Sort(getTempFileName);   // Sort() 里面自动确保了创建 Index

                                // 将dupset和前一个set归并
                                // 归并可以参考ResultSet中的Merge算法
                                DupResultSet tempset = new DupResultSet();
                                tempset.Open(false, getTempFileName);
                                // 功能: 合并两个数组
                                // parameters:
                                //		strStyle	运算风格 OR , AND , SUB
                                //		sourceLeft	源左边结果集
                                //		sourceRight	源右边结果集
                                //		targetLeft	目标左边结果集
                                //		targetMiddle	目标中间结果集
                                //		targetRight	目标右边结果集
                                //		bOutputDebugInfo	是否输出处理信息
                                //		strDebugInfo	处理信息
                                // return
                                //		-1	出错
                                //		0	成功
                                nRet = DupResultSet.Merge("OR",
                                                          onedatabase_set,
                                                          dupset,
                                                          null, // targetLeft,
                                                          tempset,
                                                          null, // targetRight,
                                                          false,
                                                          out strDebugInfo,
                                                          out strError);
                                if (nRet == -1)
                                {
                                    goto ERROR1;
                                }

                                {
                                    if (onedatabase_set != null)
                                    {
                                        onedatabase_set.Dispose();
                                    }
                                    onedatabase_set        = tempset;
                                    onedatabase_set.Sorted = true;  // 归并后产生的结果集自然是符合顺序的
                                }
                            }
                            finally
                            {
                                if (dupset != null)
                                {
                                    dupset.Dispose();
                                }
                            }
                        } // end of k loop
                    }     // end of j loop


                    if (alldatabase_set == null)
                    {
                        alldatabase_set = onedatabase_set;
                        onedatabase_set = null; // 避免出 try 范围时被释放。因为内容已经转移给 alldatabase_set 了
                        continue;
                    }

                    // 合并
                    if (onedatabase_set != null)
                    {
                        DupResultSet tempset0 = new DupResultSet();
                        tempset0.Open(false, getTempFileName);

                        if (alldatabase_set.Sorted == true)
                        {
                            alldatabase_set.EnsureCreateIndex(getTempFileName);
                        }
                        else
                        {
                            alldatabase_set.Sort(getTempFileName);
                        }
                        // onedatabase_set.EnsureCreateIndex(getTempFileName);
                        // 2017/4/14
                        onedatabase_set.Sort(getTempFileName);   // Sort() 里面自动确保了创建 Index

                        nRet = DupResultSet.Merge("OR",
                                                  alldatabase_set,
                                                  onedatabase_set,
                                                  null, // targetLeft,
                                                  tempset0,
                                                  null, // targetRight,
                                                  false,
                                                  out strDebugInfo,
                                                  out strError);
                        if (nRet == -1)
                        {
                            goto ERROR1;
                        }

                        {
                            if (alldatabase_set != null)
                            {
                                alldatabase_set.Dispose();
                            }

                            alldatabase_set        = tempset0;
                            alldatabase_set.Sorted = true;
                        }
                    }
                }
                finally
                {
                    if (onedatabase_set != null)
                    {
                        onedatabase_set.Dispose();
                    }
                }
            }

            // 最后要按照 Weight和Threshold的差额 对结果集进行排序,便于输出
            if (alldatabase_set != null)
            {
                alldatabase_set.SortStyle = DupResultSetSortStyle.OverThreshold;
                alldatabase_set.Sort(getTempFileName);
            }

            {
                if (sessioninfo1.DupResultSet != null)
                {
                    sessioninfo1.DupResultSet.Dispose();
                }
                sessioninfo1.DupResultSet = alldatabase_set;
            }

            if (alldatabase_set != null)
            {
                result.Value = alldatabase_set.Count;
            }
            else
            {
                result.Value = 0;
            }
            return(result);

ERROR1:
            result.Value     = -1;
            result.ErrorCode = ErrorCode.SystemError;
            result.ErrorInfo = strError;
            return(result);
        }
Пример #4
0
        // 功能: 合并两个数组
        // parameters:
        //		strStyle	运算风格 OR , AND , SUB
        //		sourceLeft	源左边结果集
        //		sourceRight	源右边结果集
        //		targetLeft	目标左边结果集
        //		targetMiddle	目标中间结果集
        //		targetRight	目标右边结果集
        //		bOutputDebugInfo	是否输出处理信息
        //		strDebugInfo	处理信息
        // return
        //		-1	出错
        //		0	成功
        public static int Merge(string strStyle,
                                DupResultSet sourceLeft,
                                DupResultSet sourceRight,
                                DupResultSet targetLeft,
                                DupResultSet targetMiddle,
                                DupResultSet targetRight,
                                bool bOutputDebugInfo,
                                out string strDebugInfo,
                                out string strError)
        {
            strDebugInfo = "";
            strError     = "";

            if (sourceLeft.m_streamSmall == null)
            {
                throw new Exception("sourceLeft结果集对象未建索引");
            }

            if (sourceRight.m_streamSmall == null)
            {
                throw new Exception("sourceRight结果集对象未建索引");
            }


            if (bOutputDebugInfo == true)
            {
                strDebugInfo += "strStyle值:" + strStyle + "<br/>";
                strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>";
                strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>";
            }

            if (String.Compare(strStyle, "OR", true) == 0)
            {
                if (targetLeft != null || targetRight != null)
                {
                    Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null");
                    throw (ex);
                }
            }

            DupLineItem dpRecordLeft;
            DupLineItem dpRecordRight;
            int         i = 0;
            int         j = 0;
            int         ret;

            while (true)
            {
                dpRecordLeft  = null;
                dpRecordRight = null;
                if (i >= sourceLeft.Count)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>";
                    }
                    i = -1;
                }
                else if (i != -1)
                {
                    try
                    {
                        dpRecordLeft = (DupLineItem)sourceLeft[i];
                        if (bOutputDebugInfo == true)
                        {
                            strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>";
                        }
                    }
                    catch (Exception e)
                    {
                        Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>");
                        throw (ex);
                    }
                }
                if (j >= sourceRight.Count)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>";
                    }
                    j = -1;
                }
                else if (j != -1)
                {
                    try
                    {
                        dpRecordRight = (DupLineItem)sourceRight[j];
                        if (bOutputDebugInfo == true)
                        {
                            strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>";
                        }
                    }
                    catch
                    {
                        Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>");
                        throw (ex);
                    }
                }
                if (i == -1 && j == -1)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "i,j都等于-1跳出<br/>";
                    }
                    break;
                }

                if (dpRecordLeft == null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>";
                    }
                    ret = 1;
                }
                else if (dpRecordRight == null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>";
                    }
                    ret = -1;
                }
                else
                {
                    ret = dpRecordLeft.CompareTo(dpRecordRight);  //MyCompareTo(oldOneKey); //改CompareTO
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>";
                    }
                }

                if (String.Compare(strStyle, "OR", true) == 0 &&
                    targetMiddle != null)
                {
                    if (ret == 0)
                    {
                        // 左右任意取一个就可以,但是要加上权值 2007/7/2
                        dpRecordLeft.Weight += dpRecordRight.Weight;

                        targetMiddle.Add(dpRecordLeft);
                        i++;
                        j++;
                    }
                    else if (ret < 0)
                    {
                        targetMiddle.Add(dpRecordLeft);
                        i++;
                    }
                    else if (ret > 0)
                    {
                        targetMiddle.Add(dpRecordRight);
                        j++;
                    }
                    continue;
                }

                if (ret == 0 && targetMiddle != null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret等于0,加到targetMiddle里面<br/>";
                    }

                    // 左右任意取一个就可以,但是要加上权值 2007/7/2
                    dpRecordLeft.Weight += dpRecordRight.Weight;

                    targetMiddle.Add(dpRecordLeft);
                    i++;
                    j++;
                }

                if (ret < 0)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret小于0,加到targetLeft里面<br/>";
                    }

                    if (targetLeft != null && dpRecordLeft != null)
                    {
                        targetLeft.Add(dpRecordLeft);
                    }
                    i++;
                }

                if (ret > 0)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret大于0,加到targetRight里面<br/>";
                    }

                    if (targetRight != null && dpRecordRight != null)
                    {
                        targetRight.Add(dpRecordRight);
                    }

                    j++;
                }
            }
            return(0);
        }
Пример #5
0
        // 功能: 合并两个数组
        // parameters:
        //		strStyle	运算风格 OR , AND , SUB
        //		sourceLeft	源左边结果集
        //		sourceRight	源右边结果集
        //		targetLeft	目标左边结果集
        //		targetMiddle	目标中间结果集
        //		targetRight	目标右边结果集
        //		bOutputDebugInfo	是否输出处理信息
        //		strDebugInfo	处理信息
        // return
        //		-1	出错
        //		0	成功
        public static int Merge(string strStyle,
            DupResultSet sourceLeft,
            DupResultSet sourceRight,
            DupResultSet targetLeft,
            DupResultSet targetMiddle,
            DupResultSet targetRight,
            bool bOutputDebugInfo,
            out string strDebugInfo,
            out string strError)
        {
            strDebugInfo = "";
            strError = "";

            if (sourceLeft.m_streamSmall == null)
            {
                throw new Exception("sourceLeft结果集对象未建索引");
            }

            if (sourceRight.m_streamSmall == null)
            {
                throw new Exception("sourceRight结果集对象未建索引");
            }


            if (bOutputDebugInfo == true)
            {
                strDebugInfo += "strStyle值:" + strStyle + "<br/>";
                strDebugInfo += "sourceLeft结果集:" + sourceLeft.Dump() + "<br/>";
                strDebugInfo += "sourceRight结果集:" + sourceRight.Dump() + "<br/>";
            }

            if (String.Compare(strStyle, "OR", true) == 0)
            {
                if (targetLeft != null || targetRight != null)
                {
                    Exception ex = new Exception("DpResultSetManager::Merge()中是不是参数用错了?当strStyle参数值为\"OR\"时,targetLeft参数和targetRight无效,值应为null");
                    throw (ex);
                }
            }

            DupLineItem dpRecordLeft;
            DupLineItem dpRecordRight;
            int i = 0;
            int j = 0;
            int ret;
            while (true)
            {
                dpRecordLeft = null;
                dpRecordRight = null;
                if (i >= sourceLeft.Count)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "i大于等于sourceLeft的个数,将i改为-1<br/>";
                    }
                    i = -1;
                }
                else if (i != -1)
                {
                    try
                    {
                        dpRecordLeft = (DupLineItem)sourceLeft[i];
                        if (bOutputDebugInfo == true)
                        {
                            strDebugInfo += "取出sourceLeft集合中第" + Convert.ToString(i) + "个元素,Path为" + dpRecordLeft.Path + "<br/>";
                        }
                    }
                    catch (Exception e)
                    {
                        Exception ex = new Exception("取SourceLeft集合出错:i=" + Convert.ToString(i) + "----Count=" + Convert.ToString(sourceLeft.Count) + ", internel error :" + e.Message + "<br/>");
                        throw (ex);
                    }
                }
                if (j >= sourceRight.Count)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "j大于等于sourceRight的个数,将j改为-1<br/>";
                    }
                    j = -1;
                }
                else if (j != -1)
                {
                    try
                    {
                        dpRecordRight = (DupLineItem)sourceRight[j];
                        if (bOutputDebugInfo == true)
                        {
                            strDebugInfo += "取出sourceRight集合中第" + Convert.ToString(j) + "个元素,Path为" + dpRecordRight.Path + "<br/>";
                        }
                    }
                    catch
                    {
                        Exception ex = new Exception("j=" + Convert.ToString(j) + "----Count=" + Convert.ToString(sourceLeft.Count) + sourceRight.GetHashCode() + "<br/>");
                        throw (ex);
                    }
                }
                if (i == -1 && j == -1)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "i,j都等于-1跳出<br/>";
                    }
                    break;
                }

                if (dpRecordLeft == null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordLeft为null,设ret等于1<br/>";
                    }
                    ret = 1;
                }
                else if (dpRecordRight == null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordRight为null,设ret等于-1<br/>";
                    }
                    ret = -1;
                }
                else
                {
                    ret = dpRecordLeft.CompareTo(dpRecordRight);  //MyCompareTo(oldOneKey); //改CompareTO
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "dpRecordLeft与dpRecordRight均不为null,比较两条记录得到ret等于" + Convert.ToString(ret) + "<br/>";
                    }
                }

                if (String.Compare(strStyle, "OR", true) == 0
                    && targetMiddle != null)
                {
                    if (ret == 0)
                    {
                        // 左右任意取一个就可以,但是要加上权值 2007/7/2
                        dpRecordLeft.Weight += dpRecordRight.Weight;

                        targetMiddle.Add(dpRecordLeft);
                        i++;
                        j++;
                    }
                    else if (ret < 0)
                    {
                        targetMiddle.Add(dpRecordLeft);
                        i++;
                    }
                    else if (ret > 0)
                    {
                        targetMiddle.Add(dpRecordRight);
                        j++;
                    }
                    continue;
                }

                if (ret == 0 && targetMiddle != null)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret等于0,加到targetMiddle里面<br/>";
                    }

                    // 左右任意取一个就可以,但是要加上权值 2007/7/2
                    dpRecordLeft.Weight += dpRecordRight.Weight;

                    targetMiddle.Add(dpRecordLeft);
                    i++;
                    j++;
                }

                if (ret < 0)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret小于0,加到targetLeft里面<br/>";
                    }

                    if (targetLeft != null && dpRecordLeft != null)
                        targetLeft.Add(dpRecordLeft);
                    i++;
                }

                if (ret > 0)
                {
                    if (bOutputDebugInfo == true)
                    {
                        strDebugInfo += "ret大于0,加到targetRight里面<br/>";
                    }

                    if (targetRight != null && dpRecordRight != null)
                        targetRight.Add(dpRecordRight);

                    j++;
                }
            }
            return 0;
        }
Пример #6
0
        // 针对一个from进行检索
        // parameters:
        //      strExcludeBiblioRecPath 要排除掉的记录路径
        // return:
        //      -1  error
        //      0   not found
        //      1   found
        int SearchOneFrom(
            // RmsChannelCollection Channels,
            RmsChannel channel,
            string strDbName,
            string strFrom,
            string strKey,
            string strSearchStyle,
            int nWeight,
            int nThreshold,
            long nMax,
            string strExcludeBiblioRecPath,
            out DupResultSet dupset,
            out string strError)
        {
            strError = "";
            dupset = null;
            long lRet = 0;

            if (strSearchStyle == "")
                strSearchStyle = "exact";

            string strQueryXml = "<target list='"
                + StringUtil.GetXmlStringSimple(strDbName + ":" + strFrom)       // 2007/9/14
                + "'><item><word>"
                + StringUtil.GetXmlStringSimple(strKey)
                + "</word><match>" + strSearchStyle + "</match><relation>=</relation><dataType>string</dataType><maxCount>" + nMax.ToString() + "</maxCount></item><lang>zh</lang></target>";

            string strSearchReason = "key='" + strKey + "', from='" + strFrom + "', weight=" + Convert.ToString(nWeight);

            /*
            RmsChannel channel = Channels.GetChannel(this.WsUrl);
            if (channel == null)
            {
                strError = "get channel error";
                goto ERROR1;
            }
             * */
            Debug.Assert(channel != null, "");

            lRet = channel.DoSearch(strQueryXml,
                "dup",
                "", // strOuputStyle
                out strError);
            if (lRet == -1)
                goto ERROR1;

            if (lRet == 0)
                return 0;   // not found

            long lHitCount = lRet;

            long lStart = 0;
            long lPerCount = Math.Min(50, lHitCount);
            List<string> aPath = null;

            dupset = new DupResultSet();
            dupset.Open(false, getTempFileName);

            // 获得结果集,对逐个记录进行处理
            for (; ; )
            {
                // TODO: 中间要可以中断

                lRet = channel.DoGetSearchResult(
                    "dup",   // strResultSetName
                    lStart,
                    lPerCount,
                    "zh",
                    null,   // stop
                    out aPath,
                    out strError);
                if (lRet == -1)
                    goto ERROR1;

                if (lRet == 0)
                {
                    strError = "未命中";
                    break;  // ??
                }

                // TODO: 要判断 aPath.Count == 0 跳出循环。否则容易进入死循环

                // 处理浏览结果
                for (int i = 0; i < aPath.Count; i++)
                {
                    string strPath = aPath[i];

                    // 忽略发起记录的路径
                    if (strPath == strExcludeBiblioRecPath)
                        continue;

                    DupLineItem item = new DupLineItem();
                    item.Path = strPath;
                    item.Weight = nWeight;
                    item.Threshold = nThreshold;
                    dupset.Add(item);

                }

                lStart += aPath.Count;
                if (lStart >= lHitCount || lPerCount <= 0)
                    break;
            }

            return 1;
        ERROR1:
            return -1;
        }
Пример #7
0
        // 进行查重
        // parameters:
        //      sessioninfo 仅仅用来存放DupResultSet,不应该用来sessioninfo.GetChannel(),而要用channel来进行检索操作
        //      channel
        //      strOriginBiblioRecPath  发起的书目记录路径
        //      strOriginBiblioRecXml   发起的书目记录XML
        //      strProjectName  查重方案名
        //      strStyle    includeoriginrecord输出结果中包含发起记录(缺省为不包含)
        // return:
        //      -1  error
        //      0   not found
        //      其他    命中记录条数
        public LibraryServerResult SearchDup(
            SessionInfo sessioninfo1,
            RmsChannel channel,
            string strOriginBiblioRecPath,
            string strOriginBiblioRecXml,
            string strProjectName,
            string strStyle,
            out string strUsedProjectName)
        {
            string strError = "";
            int nRet = 0;
            strUsedProjectName = "";

            string strDebugInfo = "";

            strStyle = strStyle.ToLower();
            bool bIncludeOriginRecord = StringUtil.IsInList("includeoriginrecord", strStyle);

            LibraryServerResult result = new LibraryServerResult();

            // 如果没有给出方案名,则需要在<default>元素中找到一个书目库的缺省查重方案
            if (String.IsNullOrEmpty(strProjectName) == true)
            {
                if (String.IsNullOrEmpty(strOriginBiblioRecPath) == true)
                {
                    strError = "既没有给出查重方案名,也没有给出记录路径,无法进行查重";
                    goto ERROR1;
                }
                string strOriginBiblioDbName = ResPath.GetDbName(strOriginBiblioRecPath);

                XmlNode nodeDefault = this.LibraryCfgDom.DocumentElement.SelectSingleNode("//dup/default[@origin='" + strOriginBiblioDbName + "']");
                if (nodeDefault == null)
                {
                    strError = "在没有明确指定查重方案名的情况下,本希望通过相关书目库的缺省查重方案名进行查重。但目前系统没有为书目库 '" + strOriginBiblioDbName + "' 定义缺省查重方案名,无法进行查重";
                    goto ERROR1;
                }

                string strDefaultProjectName = DomUtil.GetAttr(nodeDefault, "project");
                if (String.IsNullOrEmpty(strDefaultProjectName) == true)
                {
                    strError = "书目库 '" + strOriginBiblioDbName + "' 的<default>元素中未定义project属性值";
                    goto ERROR1;
                }

                strProjectName = strDefaultProjectName;
            }

            strUsedProjectName = strProjectName;

            XmlNode nodeProject = null;
            // 获得查重方案定义节点
            // return:
            //      -1  出错
            //      0   not found
            //      1   found
            nRet = GetDupProjectNode(strProjectName,
                out nodeProject,
                out strError);
            if (nRet == 0 || nRet == -1)
                goto ERROR1;

            Debug.Assert(nodeProject != null, "");

            DupResultSet alldatabase_set = null;    // 所有库的结果集

            XmlNodeList nodeDatabases = nodeProject.SelectNodes("database");

            // 循环,针对每个数据库进行检索
            for (int i = 0; i < nodeDatabases.Count; i++)
            {
                XmlNode nodeDatabase = nodeDatabases[i];
                string strDatabaseName = DomUtil.GetAttr(nodeDatabase, "name");
                string strThreshold = DomUtil.GetAttr(nodeDatabase, "threshold");
                int nThreshold = 0;
                try
                {
                    nThreshold = Convert.ToInt32(strThreshold);
                }
                catch
                {
                }

                List<AccessKeyInfo> aKeyLine = null;
                // 模拟创建检索点,以获得检索点列表
                // return:
                //      -1  error
                //      0   succeed
                nRet = GetKeys(
                    // sessioninfo.Channels,
                    channel,
                    strOriginBiblioRecPath,
                    strOriginBiblioRecXml,
                    out aKeyLine,
                    out strError);
                if (nRet == -1)
                    goto ERROR1;

                DupResultSet onedatabase_set = null;    // 一个库的结果集
                try
                {
                    XmlNodeList accesspoints = nodeDatabase.SelectNodes("accessPoint");
                    // <accessPoint>循环
                    for (int j = 0; j < accesspoints.Count; j++)
                    {
                        XmlNode accesspoint = accesspoints[j];

                        string strFrom = DomUtil.GetAttr(accesspoint, "name");

                        // 获得from所对应的key
                        List<string> keys = GetKeyByFrom(aKeyLine,
                            strFrom);
                        if (keys.Count == 0)
                            continue;

                        string strWeight = DomUtil.GetAttr(accesspoint, "weight");
                        string strSearchStyle = DomUtil.GetAttr(accesspoint, "searchStyle");

                        int nWeight = 0;
                        try
                        {
                            nWeight = Convert.ToInt32(strWeight);
                        }
                        catch
                        {
                            // 警告定义问题?
                        }

                        for (int k = 0; k < keys.Count; k++)
                        {
                            string strKey = (string)keys[k];
                            if (strKey == "")
                                continue;

                            DupResultSet dupset = null;
                            try
                            {
                                // 针对一个from进行检索
                                // return:
                                //      -1  error
                                //      0   not found
                                //      1   found
                                nRet = SearchOneFrom(
                                    // sessioninfo.Channels,
                                    channel,
                                    strDatabaseName,
                                    strFrom,
                                    strKey,
                                    strSearchStyle,
                                    nWeight,
                                    nThreshold,
                                    5000,   // ???
                                    (bIncludeOriginRecord == false) ? strOriginBiblioRecPath : null,
                                    out dupset,
                                    out strError);

                                if (nRet == -1)
                                {
                                    // ??? 警告检索错误?
                                    continue;
                                }

                                if (onedatabase_set == null)
                                {
                                    onedatabase_set = dupset;
                                    dupset = null;  // 避免出 try 范围时被释放。因为内容已经转移给 onedatabase_set 了
                                    continue;
                                }

                                if (nRet == 0)
                                    continue;

                                Debug.Assert(dupset != null, "");

                                onedatabase_set.EnsureCreateIndex(getTempFileName);
                                dupset.EnsureCreateIndex(getTempFileName);

                                // 将dupset和前一个set归并
                                // 归并可以参考ResultSet中的Merge算法
                                DupResultSet tempset = new DupResultSet();
                                tempset.Open(false, getTempFileName);
                                // 功能: 合并两个数组
                                // parameters:
                                //		strStyle	运算风格 OR , AND , SUB
                                //		sourceLeft	源左边结果集
                                //		sourceRight	源右边结果集
                                //		targetLeft	目标左边结果集
                                //		targetMiddle	目标中间结果集
                                //		targetRight	目标右边结果集
                                //		bOutputDebugInfo	是否输出处理信息
                                //		strDebugInfo	处理信息
                                // return
                                //		-1	出错
                                //		0	成功
                                nRet = DupResultSet.Merge("OR",
                                    onedatabase_set,
                                    dupset,
                                    null,   // targetLeft,
                                    tempset,
                                    null,   // targetRight,
                                    false,
                                    out strDebugInfo,
                                    out strError);
                                if (nRet == -1)
                                    goto ERROR1;

                                {
                                    if (onedatabase_set != null)
                                        onedatabase_set.Dispose();
                                    onedatabase_set = tempset;
                                }

                            }
                            finally
                            {
                                if (dupset != null)
                                    dupset.Dispose();
                            }
                        } // end of k loop

                    } // end of j loop


                    if (alldatabase_set == null)
                    {
                        alldatabase_set = onedatabase_set;
                        onedatabase_set = null; // 避免出 try 范围时被释放。因为内容已经转移给 alldatabase_set 了
                        continue;
                    }

                    // 合并
                    if (onedatabase_set != null)
                    {
                        DupResultSet tempset0 = new DupResultSet();
                        tempset0.Open(false, getTempFileName);

                        alldatabase_set.EnsureCreateIndex(getTempFileName);
                        onedatabase_set.EnsureCreateIndex(getTempFileName);

                        nRet = DupResultSet.Merge("OR",
                            alldatabase_set,
                            onedatabase_set,
                            null,   // targetLeft,
                            tempset0,
                            null,   // targetRight,
                            false,
                            out strDebugInfo,
                            out strError);
                        if (nRet == -1)
                            goto ERROR1;

                        {
                            if (alldatabase_set != null)
                                alldatabase_set.Dispose();

                            alldatabase_set = tempset0;
                        }
                    }
                }
                finally
                {
                    if (onedatabase_set != null)
                        onedatabase_set.Dispose();
                }
            }

            // 最后要按照 Weight和Threshold的差额 对结果集进行排序,便于输出
            if (alldatabase_set != null)
            {
                alldatabase_set.SortStyle = DupResultSetSortStyle.OverThreshold;
                alldatabase_set.Sort(getTempFileName);
            }

            {
                if (sessioninfo1.DupResultSet != null)
                    sessioninfo1.DupResultSet.Dispose();
                sessioninfo1.DupResultSet = alldatabase_set;
            }

            if (alldatabase_set != null)
                result.Value = alldatabase_set.Count;
            else
                result.Value = 0;
            return result;
        ERROR1:
            result.Value = -1;
            result.ErrorCode = ErrorCode.SystemError;
            result.ErrorInfo = strError;
            return result;
        }