Example #1
0
    /// <summary>
    /// 获得工程名
    /// </summary>
    /// <returns></returns>
    string GetProjectName()
    {
        var e = new EntityProperty();

        e.PropertyName            = "工程名称";
        e.LeadingColonKeyWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" };
        e.LeadingColonKeyWordCandidatePreprocess  = TrimEndJianCheng;
        e.QuotationTrailingWordList_IsSkipBracket = true;
        e.QuotationTrailingWordList = new string[] {
            "标段施工项目", "标段土建工程", "标段施工总承包", "标段的工程", "标段工程", "标段施工总价承包",
            "标段施工总承包工程", "标段施工工程", "标段土建工程建设项目", "标段站前工程", "标段工程(施工)",
            "工程施工工程", "项目施工工程", "施工工程",
            "工程项目", "工程标段", "标段的施工项目", "标段项目", "标段施工",
            "招标采购项目", "招标活动", "采购活动", "招标项目",
            "项目", "采购", "总承包",
            "工程", "标段", "标",
        };
        e.Extract(this);
        var prj = e.EvaluateCI();

        if (!String.IsNullOrEmpty(prj))
        {
            return(prj);
        }

        //var Stardard = TraningDataset.ContractList.Where(x => x.Id == this.Id).ToList();
        //if (Stardard.Count == 1)
        //{
        //Console.WriteLine("标准答案:" + Stardard[0].ProjectName);
        //}

        //var ProjectNameList = ProjectNameLogic.GetProjectNameByCutWord(this);
        //var ProjectNameListNER = ProjectNameLogic.GetProjectNameByNer(this);

        var StartArray = new string[] { "公司为", "参与了", "确定为" };
        var EndArray   = new string[] { "的中标单位", "的公开招投标", "的中标人", "候选人" };

        e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        e.Extract(this);
        prj = e.EvaluateCI();
        if (!String.IsNullOrEmpty(prj))
        {
            if (ExtractPropertyByHTML.FindWordCnt(prj + "项目", root).Count >= 1)
            {
                return(prj + "项目");
            }
            return(prj);
        }

        foreach (var item in quotationList)
        {
            if (item.Value.Contains("推荐的中标候选人公示"))
            {
                prj = Utility.GetStringBefore(item.Value, "推荐的中标候选人公示");
                return(prj);
            }
        }
        return(string.Empty);
    }
Example #2
0
    static string GetProjectName(MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ProjectName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ProjectName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("项目名称候补词(关键字):[" + item + "]");
            return(ProjectName);
        }

        var MarkFeature = new EntityProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "“";
        MarkFeature.MarkEndWith   = "”";
        MarkFeature.InnerEndWith  = "标段";

        var MarkFeatureConfirm = new EntityProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "“";
        MarkFeatureConfirm.MarkEndWith   = "”";
        MarkFeatureConfirm.InnerEndWith  = "标";

        Extractor.MarkFeature = new EntityProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ProjectName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ProjectName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("工程名称候补词(《XXX》):[" + item + "]");
            return(ProjectName);
        }

        var list = BussinessLogic.GetProjectName(root);

        if (list.Count > 0)
        {
            return(list[0]);
        }
        return("");
    }
Example #3
0
    /// <summary>
    /// 获得工程名
    /// </summary>
    /// <returns></returns>
    string GetProjectName()
    {
        var e = new EntityProperty();

        e.PropertyName            = "工程名称";
        e.LeadingColonKeyWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" };
        e.LeadingColonKeyWordCandidatePreprocess = TrimEndJianCheng;
        e.QuotationTrailingWordList = new string[] { "工程", "标段", "标", "招标活动", "项目", "采购" };
        var StartArray = new string[] { "公司为", "参与了", "确定为" };
        var EndArray   = new string[] { "的中标单位", "的公开招投标", "的中标人", "候选人" };

        e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        e.Extract(this);
        var prj = e.EvaluateCI();

        if (!String.IsNullOrEmpty(prj))
        {
            return(prj);
        }
        foreach (var item in quotationList)
        {
            if (item.Value.Contains("推荐的中标候选人公示"))
            {
                return(Utility.GetStringBefore(item.Value, "推荐的中标候选人公示"));
            }
        }
        return(string.Empty);
    }
Example #4
0
    static string GetMoney(HTMLEngine.MyRootHtmlNode root)
    {
        var Money     = "";
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "中标金额", "中标价", "合同金额", "合同总价", "订单总金额" };
        Extractor.Extract(root);
        var AllMoneyList = new List <Tuple <String, String> >();

        foreach (var item in Extractor.CandidateWord)
        {
            var ml = Utility.SeekMoney(item);
            AllMoneyList.AddRange(ml);
        }
        if (AllMoneyList.Count == 0)
        {
            return("");
        }
        foreach (var m in AllMoneyList)
        {
            if (m.Item2 == "人民币" || m.Item2 == "元")
            {
                Money = m.Item1;
                break;
            }
        }
        if (Money == "")
        {
            Money = AllMoneyList[0].Item1;
        }
        Program.Logger.WriteLine("金额候补词:[" + Money + "]");

        return(Money);
    }
Example #5
0
    /// <summary>
    /// 获得合同名
    /// </summary>
    /// <returns></returns>
    string GetContractName()
    {
        var e = new EntityProperty();

        e.PropertyName              = "合同名称";
        e.PropertyType              = EntityProperty.enmType.NER;
        e.MaxLength                 = ContractTraning.ContractES.MaxLength;
        e.MinLength                 = ContractTraning.ContractES.MinLength;
        e.LeadingColonKeyWordList   = new string[] { "合同名称:" };
        e.QuotationTrailingWordList = new string[] { "协议书", "合同书", "确认书", "合同", "协议" };
        e.QuotationTrailingWordList_IsSkipBracket = true;   //暂时只能选True
        var KeyList = new List <ExtractPropertyByDP.DPKeyWord>();

        KeyList.Add(new ExtractPropertyByDP.DPKeyWord()
        {
            StartWord    = new string[] { "签署", "签订" }, //通过SRL训练获得
            StartDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系 },
            EndWord      = new string[] { "补充协议", "合同书", "合同", "协议书", "协议", },
            EndDPValue   = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系, LTPTrainingDP.动宾关系, LTPTrainingDP.主谓关系 }
        });
        e.DpKeyWordList = KeyList;

        var StartArray = new string[] { "签署了", "签订了" };   //通过语境训练获得
        var EndArray   = new string[] { "合同" };

        e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); };
        e.MaxLengthCheckPreprocess = str =>
        {
            return(Utility.TrimEnglish(str));
        };
        //最高级别的置信度,特殊处理器
        e.LeadingColonKeyWordCandidatePreprocess = str =>
        {
            var c = Normalizer.ClearTrailing(TrimEndJianCheng(str));
            return(c);
        };

        e.CandidatePreprocess = str =>
        {
            var c             = Normalizer.ClearTrailing(TrimEndJianCheng(str));
            var RightQMarkIdx = c.IndexOf("”");
            if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1))
            {
                //对于"XXX"合同,有右边引号,但不是最后的时候,不用做
                c = c.TrimStart("“".ToCharArray());
            }
            c = c.TrimStart("《".ToCharArray());
            c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray());
            return(c);
        };
        e.ExcludeContainsWordList = new string[] { "日常经营重大合同" };
        //下面这个列表的根据不足,正确做法是【尚未签署】
        e.ExcludeEqualsWordList = new string[] { "若干项重大合同", "中标合同", "正式合同", "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" };
        e.Extract(this);
        //冒号优先
        return(e.EvaluateCI());
    }
Example #6
0
    /// <summary>
    /// 获得合同名
    /// </summary>
    /// <returns></returns>
    string GetContractName()
    {
        var e = new EntityProperty();

        e.PropertyName              = "合同名称";
        e.PropertyType              = EntityProperty.enmType.NER;
        e.MaxLength                 = 200;
        e.MinLength                 = 4;
        e.LeadingColonKeyWordList   = new string[] { "合同名称:" };
        e.QuotationTrailingWordList = new string[] {
            "商务合同补充协议", "承包合同补充协议", "补充协议", "经营合同补充协议",
            "协议书", "合同书", "确认书", "合同", "协议"
        };
        e.QuotationTrailingWordList_IsSkipBracket = false;
        var StartArray = new string[] { "签署了", "签订了" };   //通过语境训练获得
        var EndArray   = new string[] { "合同" };

        e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); };
        e.MaxLengthCheckPreprocess = str =>
        {
            return(Utility.TrimEnglish(str));
        };
        //最高级别的置信度,特殊处理器
        e.LeadingColonKeyWordCandidatePreprocess = str =>
        {
            var c = Normalizer.ClearTrailing(TrimEndJianCheng(str));
            return(c);
        };

        e.CandidatePreprocess = str =>
        {
            var c             = Normalizer.ClearTrailing(TrimEndJianCheng(str));
            var RightQMarkIdx = c.IndexOf("”");
            if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1))
            {
                //对于"XXX"合同,有右边引号,但不是最后的时候,不用做
                c = c.TrimStart("“".ToCharArray());
            }
            c = c.TrimStart("《".ToCharArray());
            c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray());
            return(c);
        };
        e.ExcludeContainsWordList = new string[] { "日常经营重大合同" };
        //下面这个列表的根据不足,正确做法是【尚未签署】
        e.ExcludeEqualsWordList = new string[] { "若干项重大合同", "中标合同", "正式合同", "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" };
        e.Extract(this);
        //冒号优先
        var contractname = e.EvaluateCI();

        return(contractname);
    }
Example #7
0
    /// <summary>
    /// 认购方式
    /// </summary>
    /// <param name="root"></param>
    /// <returns></returns>
    string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        var p = new EntityProperty();

        //是否包含关键字 "现金认购"
        p.KeyWordMap.Add("现金认购", "现金");
        p.Extract(this);
        if (!Program.IsMultiThreadMode)
        {
            Program.Logger.WriteLine("认购方式:" + string.Join(Utility.SplitChar, p.WordMapResult));
        }
        return(string.Join(Utility.SplitChar, p.WordMapResult));
    }
Example #8
0
    public static string GetCompanyFullName(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        Extractor.TrailingWordList = new string[] { "公司董事会" };
        Extractor.Extract(root);
        Extractor.CandidateWord.Reverse();
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("全称:[" + item + "公司]");
            return(item);
        }
        return("");
    }
Example #9
0
    //变动截止日期
    static string GetChangeEndDate(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor  = new EntityProperty();
        var StartArray = new string[] { "截止", "截至" };
        var EndArray   = new string[] { "日" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("候补变动截止日期:[" + item + "]");
            return(Normalizer.NormailizeDate(item + "日"));
        }
        return("");
    }
Example #10
0
    //认购方式
    string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        var p = new EntityProperty();

        //是否包含关键字 "现金认购"
        p.KeyWordMap.Add("现金认购", "现金");
        p.Extract(this);
        if (!String.IsNullOrEmpty(p.WordMapResult))
        {
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("认购方式:" + p.WordMapResult);
            }
        }
        return(p.WordMapResult);
    }
Example #11
0
    //固定搭配
    public static string GetCompanyShortName(HTMLEngine.MyRootHtmlNode root)
    {
        var companyList = new Dictionary <string, string>();
        //从第一行开始找到  有限公司 有限责任公司, 如果有简称的话Value是简称
        //股票简称:东方电气
        //东方电气股份有限公司董事会
        var Extractor = new EntityProperty();

        Extractor.LeadingWordList = new string[] { "股票简称", "证券简称" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ShortName = item.Replace(":", "").Replace(":", "").Trim();
            if (Utility.GetStringBefore(ShortName, "、") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, "、");
            }
            if (Utility.GetStringBefore(ShortName, ")") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, ")");
            }
            if (Utility.GetStringBefore(ShortName, "公告") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, "公告");
            }
            if (Utility.GetStringBefore(ShortName, "股票") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, "股票");
            }
            if (Utility.GetStringBefore(ShortName, "证券") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, "证券");
            }
            if (Utility.GetStringBefore(ShortName, " ") != "")
            {
                ShortName = Utility.GetStringBefore(ShortName, " ");
            }
            FDDC.Program.Logger.WriteLine("简称:[" + ShortName + "]");
            return(ShortName);
        }
        return("");
    }
Example #12
0
    static string GetYiFang(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "供应商名称:", "乙方:" };
        //"中标单位:","中标人:","中标单位:","中标人:","乙方(供方):","承包人:","承包方:","中标方:","供应商名称:","中标人名称:"
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "]");
            return(item.Trim());
        }

        //乙方:"有限公司"
        Extractor = new EntityProperty();
        //这些关键字后面
        Extractor.TrailingWordList = new string[] { "有限公司董事会" };
        Extractor.Extract(root);
        Extractor.CandidateWord.Reverse();
        foreach (var item in Extractor.CandidateWord)
        {
            //如果有子公司的话,优先使用子公司
            foreach (var c in companynamelist)
            {
                if (c.isSubCompany)
                {
                    return(c.secFullName);
                }
            }
            Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "有限公司]");
            return(item.Trim() + "有限公司");
        }

        if (companynamelist.Count > 0)
        {
            return(companynamelist[companynamelist.Count - 1].secFullName);
        }
        return("");
    }
Example #13
0
    static string GetHolderFullName(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor  = new EntityProperty();
        var StartArray = new string[] { "接到", "收到", "股东" };
        var EndArray   = new string[] { "的", "通知", "告知函", "减持", "增持", "《" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var word in Extractor.CandidateWord)
        {
            if (word.Contains("简称"))
            {
                Program.Logger.WriteLine("候补股东全称修正:[" + word + "]");
                return(word);
            }
        }
        if (Extractor.CandidateWord.Count > 0)
        {
            return(Extractor.CandidateWord[0]);
        }
        return("");
    }
Example #14
0
    /// <summary>
    /// 获得甲方
    /// </summary>
    /// <returns></returns>
    string GetJiaFang(String YiFang)
    {
        //最高置信度的抽取
        EntityProperty e = new EntityProperty();

        e.ExcludeContainsWordList = new string[] { "招标代理" };
        e.LeadingColonKeyWordList = new string[] {
            "甲方:", "合同买方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:", "项目招标人:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:"
        };
        e.CandidatePreprocess = (x =>
        {
            x = Normalizer.ClearTrailing(x);
            return(CompanyNameLogic.AfterProcessFullName(x).secFullName);
        });
        e.MaxLength = 32;
        e.MaxLengthCheckPreprocess = Utility.TrimEnglish;
        e.MinLength = 3;
        e.Extract(this);
        //这里不直接做Distinct,出现频次越高,则可信度越高
        //多个甲方的时候,可能意味着没有甲方!
        if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1)
        {
            foreach (var candidate in e.LeadingColonKeyWordCandidate)
            {
                Program.Logger.WriteLine("发现多个甲方:" + candidate);
            }
        }
        if (e.LeadingColonKeyWordCandidate.Count > 0)
        {
            return(e.LeadingColonKeyWordCandidate[0]);
        }

        var ner    = SearchJiaFang();
        var NerJia = String.Empty;

        if (!String.IsNullOrEmpty(ner))
        {
            foreach (var cn in companynamelist)
            {
                if (cn.secShortName == ner)
                {
                    ner = cn.secFullName;
                }
            }
            if (String.IsNullOrEmpty(YiFang))
            {
                NerJia = ner;
            }
            if (!YiFang.Equals(ner))
            {
                NerJia = ner;
            }
        }

        //招标
        var Extractor     = new ExtractPropertyByHTML();
        var CandidateWord = new List <String>();
        var StartArray    = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray      = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim();
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }

        //合同
        Extractor  = new ExtractPropertyByHTML();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }
        if (!String.IsNullOrEmpty(NerJia))
        {
            //原则上,有NER中提取的甲方,则使用甲方
            foreach (var c in CandidateWord)
            {
                //但是,这里有可能是正确的解答,例如
                //NER:(集团)有限公司 实际上应该是 XXXX(集团)有限公司
                if (c.EndsWith(NerJia))
                {
                    return(c);
                }
            }
            return(NerJia);
        }
        else
        {
            return(CompanyNameLogic.MostLikeCompanyName(CandidateWord));
        }
    }
Example #15
0
    static string GetJiaFang(MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] {
            "甲方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购人:", "采购人名称:", "采购方:"
        };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(关键字):[" + JiaFang + "]");
            return(JiaFang);
        }

        //招标
        Extractor = new EntityProperty();
        var StartArray = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray   = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            JiaFang = JiaFang.Replace("业主", "").Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang + "]");
            return(JiaFang);
        }

        //合同
        Extractor  = new EntityProperty();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            JiaFang = JiaFang.Replace("业主", "").Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang + "]");
            return(JiaFang);
        }
        return("");
    }
Example #16
0
    static string GetContractName(MyRootHtmlNode root)
    {
        var Extractor   = new EntityProperty();
        var MarkFeature = new EntityProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "《";
        MarkFeature.MarkEndWith   = "》";
        MarkFeature.InnerEndWith  = "合同";

        var MarkFeatureConfirm = new EntityProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "《";
        MarkFeatureConfirm.MarkEndWith   = "》";
        MarkFeatureConfirm.InnerEndWith  = "确认书";


        Extractor.MarkFeature = new EntityProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同名称候补词(《XXX》):[" + item + "]");
            return(ContractName);
        }

        Extractor = new EntityProperty();
        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "合同名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同名称候补词(关键字):[" + item + "]");
            return(ContractName);
        }

        //合同
        Extractor = new EntityProperty();
        var StartArray = new string[] { "签署了" };
        var EndArray   = new string[] { "合同" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同候补词(合同):[" + item + "]");
            return(ContractName);
        }
        return("");
    }
Example #17
0
    /// <summary>
    /// 获得甲方
    /// </summary>
    /// <returns></returns>
    public string GetJiaFang()
    {
        //最高置信度的抽取
        EntityProperty e = new EntityProperty();

        e.ExcludeContainsWordList = new string[] { "招标代理" };
        e.LeadingColonKeyWordList = new string[] {
            "甲方:", "合同买方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:"
        };
        e.CandidatePreprocess = (x =>
        {
            x = Normalizer.ClearTrailing(x);
            return(CompanyNameLogic.AfterProcessFullName(x).secFullName);
        });
        e.MaxLength = ContractTraning.JiaFangES.MaxLength;
        e.MaxLengthCheckPreprocess = Utility.TrimEnglish;
        e.MinLength = 3;
        e.Extract(this);

        //这里不直接做Distinct,出现频次越高,则可信度越高
        //多个甲方的时候,可能意味着没有甲方!
        if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1)
        {
            foreach (var candidate in e.LeadingColonKeyWordCandidate)
            {
                Program.Logger.WriteLine("发现多个甲方:" + candidate);
            }
        }
        if (e.LeadingColonKeyWordCandidate.Count > 0)
        {
            return(e.LeadingColonKeyWordCandidate[0]);
        }


        //招标
        var Extractor     = new ExtractPropertyByHTML();
        var CandidateWord = new List <String>();
        var StartArray    = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray      = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim();
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }

        //合同
        Extractor  = new ExtractPropertyByHTML();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim());
            JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim();
            if (JiaFang.secFullName.Contains("招标代理"))
            {
                continue;                                       //特殊业务规则
            }
            if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength)
            {
                continue;
            }
            if (JiaFang.secFullName.Length < 3)
            {
                continue;                                     //使用实际长度排除全英文的情况
            }
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]");
            }
            CandidateWord.Add(JiaFang.secFullName);
        }
        return(CompanyNameLogic.MostLikeCompanyName(CandidateWord));
    }
Example #18
0
    /// <summary>
    /// 获得合同名
    /// </summary>
    /// <returns></returns>
    string GetContractName()
    {
        var e = new EntityProperty();

        e.PropertyName = "合同名称";
        e.PropertyType = EntityProperty.enmType.NER;
        e.MaxLength    = ContractTraning.MaxContractNameLength;
        e.MinLength    = 5;

        /* 训练模式下
         * e.LeadingColonKeyWordList = ContractTraning.ContractNameLeadingDict
         *                          .Where((x) => { return x.Value >= 40; })    //阈值40%以上
         *                          .Select((x) => { return x.Key + ":"; }).ToArray();
         */
        e.LeadingColonKeyWordList   = new string[] { "合同名称:" };
        e.QuotationTrailingWordList = new string[] { "协议书", "合同书", "确认书", "合同", "协议" };
        e.QuotationTrailingWordList_IsSkipBracket = true;   //暂时只能选True
        var KeyList = new List <ExtractPropertyByDP.DPKeyWord>();

        KeyList.Add(new ExtractPropertyByDP.DPKeyWord()
        {
            StartWord    = new string[] { "签署", "签订" }, //通过SRL训练获得
            StartDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系 },
            EndWord      = new string[] { "补充协议", "合同书", "合同", "协议书", "协议", },
            EndDPValue   = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系, LTPTrainingDP.动宾关系, LTPTrainingDP.主谓关系 }
        });
        e.DpKeyWordList = KeyList;

        var StartArray = new string[] { "签署了", "签订了" };   //通过语境训练获得
        var EndArray   = new string[] { "合同" };

        e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); };
        e.MaxLengthCheckPreprocess = str =>
        {
            return(EntityWordAnlayzeTool.TrimEnglish(str));
        };
        //最高级别的置信度,特殊处理器
        e.LeadingColonKeyWordCandidatePreprocess = str =>
        {
            var c = Normalizer.ClearTrailing(TrimJianCheng(str));
            return(c);
        };

        e.CandidatePreprocess = str =>
        {
            var c             = Normalizer.ClearTrailing(TrimJianCheng(str));
            var RightQMarkIdx = c.IndexOf("”");
            if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1))
            {
                //对于"XXX"合同,有右边引号,但不是最后的时候,不用做
                c = c.TrimStart("“".ToCharArray());
            }
            c = c.TrimStart("《".ToCharArray());
            c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray());
            return(c);
        };
        e.ExcludeContainsWordList = new string[] { "日常经营重大合同" };
        //下面这个列表的根据不足
        e.ExcludeEqualsWordList = new string[] { "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" };
        e.Extract(this);

        //是否所有的候选词里面包括(测试集无法使用)
        var contractlist = TraningDataset.ContractList.Where((x) => { return(x.id == this.Id); });

        if (contractlist.Count() > 0)
        {
            var contract     = contractlist.First();
            var contractname = contract.ContractName;
            if (!String.IsNullOrEmpty(contractname))
            {
                e.CheckIsCandidateContainsTarget(contractname);
            }
        }
        //置信度
        e.Confidence = ContractTraning.ContractES.GetStardardCI();
        return(e.EvaluateCI());
    }