/// <summary> /// 获得工程名 /// </summary> /// <returns></returns> string GetProjectName() { var e = new EntityProperty(); e.PropertyName = "工程名称"; e.LeadingColonKeyWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" }; e.LeadingColonKeyWordCandidatePreprocess = TrimEndJianCheng; e.QuotationTrailingWordList_IsSkipBracket = true; e.QuotationTrailingWordList = new string[] { "标段施工项目", "标段土建工程", "标段施工总承包", "标段的工程", "标段工程", "标段施工总价承包", "标段施工总承包工程", "标段施工工程", "标段土建工程建设项目", "标段站前工程", "标段工程(施工)", "工程施工工程", "项目施工工程", "施工工程", "工程项目", "工程标段", "标段的施工项目", "标段项目", "标段施工", "招标采购项目", "招标活动", "采购活动", "招标项目", "项目", "采购", "总承包", "工程", "标段", "标", }; e.Extract(this); var prj = e.EvaluateCI(); if (!String.IsNullOrEmpty(prj)) { return(prj); } //var Stardard = TraningDataset.ContractList.Where(x => x.Id == this.Id).ToList(); //if (Stardard.Count == 1) //{ //Console.WriteLine("标准答案:" + Stardard[0].ProjectName); //} //var ProjectNameList = ProjectNameLogic.GetProjectNameByCutWord(this); //var ProjectNameListNER = ProjectNameLogic.GetProjectNameByNer(this); var StartArray = new string[] { "公司为", "参与了", "确定为" }; var EndArray = new string[] { "的中标单位", "的公开招投标", "的中标人", "候选人" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.Extract(this); prj = e.EvaluateCI(); if (!String.IsNullOrEmpty(prj)) { if (ExtractPropertyByHTML.FindWordCnt(prj + "项目", root).Count >= 1) { return(prj + "项目"); } return(prj); } foreach (var item in quotationList) { if (item.Value.Contains("推荐的中标候选人公示")) { prj = Utility.GetStringBefore(item.Value, "推荐的中标候选人公示"); return(prj); } } return(string.Empty); }
/// <summary> /// 获得工程名 /// </summary> /// <returns></returns> string GetProjectName() { var e = new EntityProperty(); e.PropertyName = "工程名称"; e.LeadingColonKeyWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" }; e.LeadingColonKeyWordCandidatePreprocess = TrimEndJianCheng; e.QuotationTrailingWordList = new string[] { "工程", "标段", "标", "招标活动", "项目", "采购" }; var StartArray = new string[] { "公司为", "参与了", "确定为" }; var EndArray = new string[] { "的中标单位", "的公开招投标", "的中标人", "候选人" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.Extract(this); var prj = e.EvaluateCI(); if (!String.IsNullOrEmpty(prj)) { return(prj); } foreach (var item in quotationList) { if (item.Value.Contains("推荐的中标候选人公示")) { return(Utility.GetStringBefore(item.Value, "推荐的中标候选人公示")); } } return(string.Empty); }
/// <summary> /// 获得合同名 /// </summary> /// <returns></returns> string GetContractName() { var e = new EntityProperty(); e.PropertyName = "合同名称"; e.PropertyType = EntityProperty.enmType.NER; e.MaxLength = ContractTraning.ContractES.MaxLength; e.MinLength = ContractTraning.ContractES.MinLength; e.LeadingColonKeyWordList = new string[] { "合同名称:" }; e.QuotationTrailingWordList = new string[] { "协议书", "合同书", "确认书", "合同", "协议" }; e.QuotationTrailingWordList_IsSkipBracket = true; //暂时只能选True var KeyList = new List <ExtractPropertyByDP.DPKeyWord>(); KeyList.Add(new ExtractPropertyByDP.DPKeyWord() { StartWord = new string[] { "签署", "签订" }, //通过SRL训练获得 StartDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系 }, EndWord = new string[] { "补充协议", "合同书", "合同", "协议书", "协议", }, EndDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系, LTPTrainingDP.动宾关系, LTPTrainingDP.主谓关系 } }); e.DpKeyWordList = KeyList; var StartArray = new string[] { "签署了", "签订了" }; //通过语境训练获得 var EndArray = new string[] { "合同" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); }; e.MaxLengthCheckPreprocess = str => { return(Utility.TrimEnglish(str)); }; //最高级别的置信度,特殊处理器 e.LeadingColonKeyWordCandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); return(c); }; e.CandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); var RightQMarkIdx = c.IndexOf("”"); if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1)) { //对于"XXX"合同,有右边引号,但不是最后的时候,不用做 c = c.TrimStart("“".ToCharArray()); } c = c.TrimStart("《".ToCharArray()); c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray()); return(c); }; e.ExcludeContainsWordList = new string[] { "日常经营重大合同" }; //下面这个列表的根据不足,正确做法是【尚未签署】 e.ExcludeEqualsWordList = new string[] { "若干项重大合同", "中标合同", "正式合同", "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" }; e.Extract(this); //冒号优先 return(e.EvaluateCI()); }
/// <summary> /// 获得合同名 /// </summary> /// <returns></returns> string GetContractName() { var e = new EntityProperty(); e.PropertyName = "合同名称"; e.PropertyType = EntityProperty.enmType.NER; e.MaxLength = 200; e.MinLength = 4; e.LeadingColonKeyWordList = new string[] { "合同名称:" }; e.QuotationTrailingWordList = new string[] { "商务合同补充协议", "承包合同补充协议", "补充协议", "经营合同补充协议", "协议书", "合同书", "确认书", "合同", "协议" }; e.QuotationTrailingWordList_IsSkipBracket = false; var StartArray = new string[] { "签署了", "签订了" }; //通过语境训练获得 var EndArray = new string[] { "合同" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); }; e.MaxLengthCheckPreprocess = str => { return(Utility.TrimEnglish(str)); }; //最高级别的置信度,特殊处理器 e.LeadingColonKeyWordCandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); return(c); }; e.CandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); var RightQMarkIdx = c.IndexOf("”"); if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1)) { //对于"XXX"合同,有右边引号,但不是最后的时候,不用做 c = c.TrimStart("“".ToCharArray()); } c = c.TrimStart("《".ToCharArray()); c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray()); return(c); }; e.ExcludeContainsWordList = new string[] { "日常经营重大合同" }; //下面这个列表的根据不足,正确做法是【尚未签署】 e.ExcludeEqualsWordList = new string[] { "若干项重大合同", "中标合同", "正式合同", "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" }; e.Extract(this); //冒号优先 var contractname = e.EvaluateCI(); return(contractname); }
/// <summary> /// 获得合同名 /// </summary> /// <returns></returns> string GetContractName() { var e = new EntityProperty(); e.PropertyName = "合同名称"; e.PropertyType = EntityProperty.enmType.NER; e.MaxLength = ContractTraning.MaxContractNameLength; e.MinLength = 5; /* 训练模式下 * e.LeadingColonKeyWordList = ContractTraning.ContractNameLeadingDict * .Where((x) => { return x.Value >= 40; }) //阈值40%以上 * .Select((x) => { return x.Key + ":"; }).ToArray(); */ e.LeadingColonKeyWordList = new string[] { "合同名称:" }; e.QuotationTrailingWordList = new string[] { "协议书", "合同书", "确认书", "合同", "协议" }; e.QuotationTrailingWordList_IsSkipBracket = true; //暂时只能选True var KeyList = new List <ExtractPropertyByDP.DPKeyWord>(); KeyList.Add(new ExtractPropertyByDP.DPKeyWord() { StartWord = new string[] { "签署", "签订" }, //通过SRL训练获得 StartDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系 }, EndWord = new string[] { "补充协议", "合同书", "合同", "协议书", "协议", }, EndDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系, LTPTrainingDP.动宾关系, LTPTrainingDP.主谓关系 } }); e.DpKeyWordList = KeyList; var StartArray = new string[] { "签署了", "签订了" }; //通过语境训练获得 var EndArray = new string[] { "合同" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); }; e.MaxLengthCheckPreprocess = str => { return(EntityWordAnlayzeTool.TrimEnglish(str)); }; //最高级别的置信度,特殊处理器 e.LeadingColonKeyWordCandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimJianCheng(str)); return(c); }; e.CandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimJianCheng(str)); var RightQMarkIdx = c.IndexOf("”"); if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1)) { //对于"XXX"合同,有右边引号,但不是最后的时候,不用做 c = c.TrimStart("“".ToCharArray()); } c = c.TrimStart("《".ToCharArray()); c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray()); return(c); }; e.ExcludeContainsWordList = new string[] { "日常经营重大合同" }; //下面这个列表的根据不足 e.ExcludeEqualsWordList = new string[] { "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" }; e.Extract(this); //是否所有的候选词里面包括(测试集无法使用) var contractlist = TraningDataset.ContractList.Where((x) => { return(x.id == this.Id); }); if (contractlist.Count() > 0) { var contract = contractlist.First(); var contractname = contract.ContractName; if (!String.IsNullOrEmpty(contractname)) { e.CheckIsCandidateContainsTarget(contractname); } } //置信度 e.Confidence = ContractTraning.ContractES.GetStardardCI(); return(e.EvaluateCI()); }