Beispiel #1
0
    ContractRec ExtractSingle()
    {
        contractType = String.Empty;
        foreach (var paragrah in root.Children)
        {
            foreach (var item in paragrah.Children)
            {
                if (item.Content.Contains("中标"))
                {
                    contractType = "中标";
                    break;
                }
                if (item.Content.Contains("合同"))
                {
                    contractType = "合同";
                    break;
                }
            }
            if (contractType != String.Empty)
            {
                break;
            }
        }

        if (contractType == String.Empty)
        {
            Console.WriteLine("contractType Null:" + Id);
        }

        var contract = new ContractRec();

        //公告ID
        contract.Id = Id;

        //乙方
        contract.YiFang = GetYiFang();
        if (contract.YiFang.Contains("本公司"))
        {
            contract.YiFang = string.Empty;
        }
        contract.YiFang = CompanyNameLogic.AfterProcessFullName(contract.YiFang).secFullName;
        contract.YiFang = contract.YiFang.NormalizeTextResult();
        //按照规定除去括号
        contract.YiFang = RegularTool.TrimBrackets(contract.YiFang);
        if (contract.YiFang.Length < 3)
        {
            contract.YiFang = string.Empty;
        }


        //甲方
        contract.JiaFang = GetJiaFang(contract.YiFang);
        if (contract.JiaFang.Contains("本公司"))
        {
            contract.JiaFang = string.Empty;
        }
        contract.JiaFang = CompanyNameLogic.AfterProcessFullName(contract.JiaFang).secFullName;
        contract.JiaFang = contract.JiaFang.NormalizeTextResult();
        if (contract.JiaFang.Contains("简称"))
        {
            contract.JiaFang = Utility.GetStringBefore(contract.JiaFang, "(");
        }
        //机构列表
        if (Nerlist != null)
        {
            var NiList = Nerlist.Where((n) => n.Type == LTPTrainingNER.enmNerType.Ni).Select((m) => m.RawData);
            if (!NiList.Contains(contract.JiaFang))
            {
                if (NiList.Contains("国家电网公司"))
                {
                    contract.JiaFang = "国家电网公司";
                }
            }
        }
        //项目
        contract.ProjectName = GetProjectName();
        contract.ProjectName = contract.ProjectName.NormalizeTextResult();
        if (contract.ProjectName.StartsWith("“") && contract.ProjectName.EndsWith("”"))
        {
            contract.ProjectName = contract.ProjectName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
        }
        if (contract.ProjectName.EndsWith(",签约双方"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, ",签约双方");
        }
        if (contract.ProjectName.Contains("(以下简称"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, "(以下简称");
        }
        if (contract.ProjectName.EndsWith(")"))
        {
            if (contract.ProjectName.Contains("(招标编号"))
            {
                contract.ProjectName = Utility.GetStringBefore(contract.ProjectName, "(招标编号");
            }
            if (contract.ProjectName.Contains("(合同编号"))
            {
                contract.ProjectName = Utility.GetStringBefore(contract.ProjectName, "(合同编号");
            }
        }
        contract.ProjectName = contract.ProjectName.Replace("的推荐中标", "");
        //特殊处理
        contract.ProjectName = contract.ProjectName.Replace("<1>", "1、");
        contract.ProjectName = contract.ProjectName.Replace("“", "");
        contract.ProjectName = contract.ProjectName.Replace("”", "");

        //合同名
        contract.ContractName = GetContractName();
        if (contract.ContractName.StartsWith("“") && contract.ContractName.EndsWith("”"))
        {
            contract.ContractName = contract.ContractName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
        }
        //去掉书名号
        contract.ContractName = contract.ContractName.Replace("《", String.Empty).Replace("》", String.Empty);
        contract.ContractName = contract.ContractName.NormalizeTextResult();
        if (contract.ContractName.Contains("(以下简称"))
        {
            contract.ContractName = Utility.GetStringAfter(contract.ContractName, "(以下简称");
        }
        contract.ContractName = ExtendContractName(contract.ContractName);

        //如果是采购协议,则工程名清空
        if (contract.ContractName.Contains("采购"))
        {
            if (contract.ProjectName.Contains("标段"))
            {
                //TODO:
            }
            else
            {
                contract.ProjectName = string.Empty;
            }
        }

        //金额
        var money = GetMoney();

        contract.ContractMoneyUpLimit   = MoneyUtility.Format(money.MoneyAmount, String.Empty);
        contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit;

        //联合体
        contract.UnionMember = GetUnionMember(contract);
        contract.UnionMember = contract.UnionMember.NormalizeTextResult();
        //按照规定除去括号
        contract.UnionMember = RegularTool.TrimBrackets(contract.UnionMember);

        var YiFangArray = contract.YiFang.Split(Utility.SplitChar);

        if (YiFangArray.Length > 1)
        {
            contract.UnionMember = Utility.GetStringAfter(contract.YiFang, Utility.SplitChar);
            contract.YiFang      = YiFangArray[0];
            Console.WriteLine("联合体:" + contract.UnionMember);
        }
        return(contract);
    }
Beispiel #2
0
    struContract ExtractSingle(MyRootHtmlNode root, String Id)
    {
        contractType = String.Empty;
        foreach (var paragrah in root.Children)
        {
            foreach (var item in paragrah.Children)
            {
                if (item.Content.Contains("中标"))
                {
                    contractType = "中标";
                    break;
                }
                if (item.Content.Contains("合同"))
                {
                    contractType = "合同";
                    break;
                }
            }
            if (contractType != String.Empty)
            {
                break;
            }
        }

        if (contractType == String.Empty)
        {
            Console.WriteLine("contractType Null:" + Id);
        }

        var contract = new struContract();

        //公告ID
        contract.id = Id;
        //甲方
        contract.JiaFang = GetJiaFang();
        contract.JiaFang = CompanyNameLogic.AfterProcessFullName(contract.JiaFang).secFullName;
        contract.JiaFang = contract.JiaFang.NormalizeTextResult();
        if (!Nerlist.Contains(contract.JiaFang))
        {
            //作为特殊单位,国家电网公司一般都是甲方
            if (Nerlist.Contains("国家电网公司"))
            {
                contract.JiaFang = "国家电网公司";
            }
        }

        //乙方
        contract.YiFang = GetYiFang();
        contract.YiFang = CompanyNameLogic.AfterProcessFullName(contract.YiFang).secFullName;
        contract.YiFang = contract.YiFang.NormalizeTextResult();
        //按照规定除去括号
        contract.YiFang = RegularTool.TrimBrackets(contract.YiFang);


        //项目
        contract.ProjectName = GetProjectName();
        if (contract.ProjectName.StartsWith("“") && contract.ProjectName.EndsWith("”"))
        {
            contract.ProjectName = contract.ProjectName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
        }
        if (contract.ProjectName.EndsWith(",签约双方"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, ",签约双方");
        }
        if (contract.ProjectName.Contains("(以下简称"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, "(以下简称");
        }
        contract.ProjectName = contract.ProjectName.NormalizeTextResult();

        //合同
        if (contractType == "中标")
        {
            //按照数据分析来看,应该工程名 在中标的时候填写,合同名在合同的时候填写
            contract.ContractName = String.Empty;
        }
        else
        {
            contract.ContractName = GetContractName();
            if (contract.ContractName.StartsWith("“") && contract.ContractName.EndsWith("”"))
            {
                contract.ContractName = contract.ContractName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
            }
            //去掉书名号
            contract.ContractName = contract.ContractName.Replace("《", String.Empty).Replace("》", String.Empty);
            if (contract.ContractName.Contains("(以下简称"))
            {
                contract.ContractName = Utility.GetStringAfter(contract.ContractName, "(以下简称");
            }
            contract.ContractName = contract.ContractName.NormalizeTextResult();
        }


        //金额
        var money = GetMoney();

        contract.ContractMoneyUpLimit   = MoneyUtility.Format(money.MoneyAmount, String.Empty);
        contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit;

        //联合体
        contract.UnionMember = GetUnionMember(contract.JiaFang, contract.YiFang);
        contract.UnionMember = contract.UnionMember.NormalizeTextResult();
        //按照规定除去括号
        contract.UnionMember = RegularTool.TrimBrackets(contract.UnionMember);
        return(contract);
    }
Beispiel #3
0
    /// <summary>
    /// 获得甲方
    /// </summary>
    /// <returns></returns>
    public string GetJiaFang()
    {
        //最高置信度的抽取
        EntityProperty e = new EntityProperty();

        e.ExcludeContainsWordList = new string[] { "招标代理" };
        e.LeadingColonKeyWordList = new string[] {
            "甲方:", "合同买方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:"
        };
        e.CandidatePreprocess = (x =>
        {
            x = Normalizer.ClearTrailing(x);
            return(CompanyNameLogic.AfterProcessFullName(x).secFullName);
        });
        e.MaxLength = ContractTraning.JiaFangES.MaxLength;
        e.MaxLengthCheckPreprocess = Utility.TrimEnglish;
        e.MinLength = 3;
        e.Extract(this);

        //这里不直接做Distinct,出现频次越高,则可信度越高
        //多个甲方的时候,可能意味着没有甲方!
        if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1)
        {
            foreach (var candidate in e.LeadingColonKeyWordCandidate)
            {
                Program.Logger.WriteLine("发现多个甲方:" + candidate);
            }
        }
        if (e.LeadingColonKeyWordCandidate.Count > 0)
        {
            return(e.LeadingColonKeyWordCandidate[0]);
        }


        //招标
        var Extractor     = new ExtractPropertyByHTML();
        var CandidateWord = new List <String>();
        var StartArray    = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray      = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim();
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }

        //合同
        Extractor  = new ExtractPropertyByHTML();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }
        return(CompanyNameLogic.MostLikeCompanyName(CandidateWord));
    }
Beispiel #4
0
    /// <summary>
    /// 获得甲方
    /// </summary>
    /// <returns></returns>
    string GetJiaFang(String YiFang)
    {
        //最高置信度的抽取
        EntityProperty e = new EntityProperty();

        e.ExcludeContainsWordList = new string[] { "招标代理" };
        e.LeadingColonKeyWordList = new string[] {
            "甲方:", "合同买方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:", "项目招标人:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:"
        };
        e.CandidatePreprocess = (x =>
        {
            x = Normalizer.ClearTrailing(x);
            return(CompanyNameLogic.AfterProcessFullName(x).secFullName);
        });
        e.MaxLength = 32;
        e.MaxLengthCheckPreprocess = Utility.TrimEnglish;
        e.MinLength = 3;
        e.Extract(this);
        //这里不直接做Distinct,出现频次越高,则可信度越高
        //多个甲方的时候,可能意味着没有甲方!
        if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1)
        {
            foreach (var candidate in e.LeadingColonKeyWordCandidate)
            {
                Program.Logger.WriteLine("发现多个甲方:" + candidate);
            }
        }
        if (e.LeadingColonKeyWordCandidate.Count > 0)
        {
            return(e.LeadingColonKeyWordCandidate[0]);
        }

        var ner    = SearchJiaFang();
        var NerJia = String.Empty;

        if (!String.IsNullOrEmpty(ner))
        {
            foreach (var cn in companynamelist)
            {
                if (cn.secShortName == ner)
                {
                    ner = cn.secFullName;
                }
            }
            if (String.IsNullOrEmpty(YiFang))
            {
                NerJia = ner;
            }
            if (!YiFang.Equals(ner))
            {
                NerJia = ner;
            }
        }

        //招标
        var Extractor     = new ExtractPropertyByHTML();
        var CandidateWord = new List <String>();
        var StartArray    = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray      = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim();
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }

        //合同
        Extractor  = new ExtractPropertyByHTML();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }
        if (!String.IsNullOrEmpty(NerJia))
        {
            //原则上,有NER中提取的甲方,则使用甲方
            foreach (var c in CandidateWord)
            {
                //但是,这里有可能是正确的解答,例如
                //NER:(集团)有限公司 实际上应该是 XXXX(集团)有限公司
                if (c.EndsWith(NerJia))
                {
                    return(c);
                }
            }
            return(NerJia);
        }
        else
        {
            return(CompanyNameLogic.MostLikeCompanyName(CandidateWord));
        }
    }