public static void InitContract() { var sr = new StreamReader(ContractPath_TRAIN); while (!sr.EndOfStream) { var c = ContractRec.ConvertFromString(sr.ReadLine()); ContractList.Add(c); } Console.WriteLine("合同标准结果数:" + ContractList.Count); sr.Close(); }
List <RecordBase> ExtractMultiCommon() { var MainRec = ExtractSingle(); //三项订单 //中标通知书6份 //中标通知书四份 //履行进展情况 var Records = new List <RecordBase>(); var isMulti = false; foreach (var p in root.Children) { foreach (var s in p.Children) { if (isMulti) { if (nermap.ParagraghlocateDict.ContainsKey(s.PositionId)) { var nerlist = nermap.ParagraghlocateDict[s.PositionId]; if (nerlist.moneylist.Count == 1) { var ContractRec = new ContractRec(); ContractRec.Id = Id; ContractRec.JiaFang = MainRec.JiaFang; ContractRec.YiFang = MainRec.YiFang; ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(nerlist.moneylist.First().Value.MoneyAmount, String.Empty); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } } else { var scan = NumberUtility.ConvertUpperToLower(s.Content).Replace(" ", ""); var cnt = RegularTool.GetRegular(scan, "中标通知书\\d份"); if (cnt.Count == 1) { Console.WriteLine(Id + ":" + cnt[0].RawData + "[" + scan + "]"); isMulti = true; } if (s.Content.Contains("履行进展情况")) { Console.WriteLine(Id + ":履行进展情况"); isMulti = true; } } } } return(Records); }
public static ContractRec ConvertFromString(string str) { var Array = str.Split("\t"); var c = new ContractRec(); c.Id = Array[0]; c.JiaFang = Array[1]; c.YiFang = Array[2]; c.ProjectName = Array[3]; if (Array.Length > 4) { c.ContractName = Array[4]; } if (Array.Length > 6) { c.ContractMoneyUpLimit = Array[5]; c.ContractMoneyDownLimit = Array[6]; } if (Array.Length == 8) { c.UnionMember = Array[7]; } return(c); }
ContractRec ExtractSingle() { contractType = String.Empty; foreach (var paragrah in root.Children) { foreach (var item in paragrah.Children) { if (item.Content.Contains("中标")) { contractType = "中标"; break; } if (item.Content.Contains("合同")) { contractType = "合同"; break; } } if (contractType != String.Empty) { break; } } if (contractType == String.Empty) { Console.WriteLine("contractType Null:" + Id); } var contract = new ContractRec(); //公告ID contract.Id = Id; //乙方 contract.YiFang = GetYiFang(); if (contract.YiFang.Contains("本公司")) { contract.YiFang = string.Empty; } contract.YiFang = CompanyNameLogic.AfterProcessFullName(contract.YiFang).secFullName; contract.YiFang = contract.YiFang.NormalizeTextResult(); //按照规定除去括号 contract.YiFang = RegularTool.TrimBrackets(contract.YiFang); if (contract.YiFang.Length < 3) { contract.YiFang = string.Empty; } //甲方 contract.JiaFang = GetJiaFang(contract.YiFang); if (contract.JiaFang.Contains("本公司")) { contract.JiaFang = string.Empty; } contract.JiaFang = CompanyNameLogic.AfterProcessFullName(contract.JiaFang).secFullName; contract.JiaFang = contract.JiaFang.NormalizeTextResult(); if (contract.JiaFang.Contains("简称")) { contract.JiaFang = Utility.GetStringBefore(contract.JiaFang, "("); } //机构列表 if (Nerlist != null) { var NiList = Nerlist.Where((n) => n.Type == LTPTrainingNER.enmNerType.Ni).Select((m) => m.RawData); if (!NiList.Contains(contract.JiaFang)) { if (NiList.Contains("国家电网公司")) { contract.JiaFang = "国家电网公司"; } } } //项目 contract.ProjectName = GetProjectName(); contract.ProjectName = contract.ProjectName.NormalizeTextResult(); if (contract.ProjectName.StartsWith("“") && contract.ProjectName.EndsWith("”")) { contract.ProjectName = contract.ProjectName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray()); } if (contract.ProjectName.EndsWith(",签约双方")) { contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, ",签约双方"); } if (contract.ProjectName.Contains("(以下简称")) { contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, "(以下简称"); } if (contract.ProjectName.EndsWith(")")) { if (contract.ProjectName.Contains("(招标编号")) { contract.ProjectName = Utility.GetStringBefore(contract.ProjectName, "(招标编号"); } if (contract.ProjectName.Contains("(合同编号")) { contract.ProjectName = Utility.GetStringBefore(contract.ProjectName, "(合同编号"); } } contract.ProjectName = contract.ProjectName.Replace("的推荐中标", ""); //特殊处理 contract.ProjectName = contract.ProjectName.Replace("<1>", "1、"); contract.ProjectName = contract.ProjectName.Replace("“", ""); contract.ProjectName = contract.ProjectName.Replace("”", ""); //合同名 contract.ContractName = GetContractName(); if (contract.ContractName.StartsWith("“") && contract.ContractName.EndsWith("”")) { contract.ContractName = contract.ContractName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray()); } //去掉书名号 contract.ContractName = contract.ContractName.Replace("《", String.Empty).Replace("》", String.Empty); contract.ContractName = contract.ContractName.NormalizeTextResult(); if (contract.ContractName.Contains("(以下简称")) { contract.ContractName = Utility.GetStringAfter(contract.ContractName, "(以下简称"); } contract.ContractName = ExtendContractName(contract.ContractName); //如果是采购协议,则工程名清空 if (contract.ContractName.Contains("采购")) { if (contract.ProjectName.Contains("标段")) { //TODO: } else { contract.ProjectName = string.Empty; } } //金额 var money = GetMoney(); contract.ContractMoneyUpLimit = MoneyUtility.Format(money.MoneyAmount, String.Empty); contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit; //联合体 contract.UnionMember = GetUnionMember(contract); contract.UnionMember = contract.UnionMember.NormalizeTextResult(); //按照规定除去括号 contract.UnionMember = RegularTool.TrimBrackets(contract.UnionMember); var YiFangArray = contract.YiFang.Split(Utility.SplitChar); if (YiFangArray.Length > 1) { contract.UnionMember = Utility.GetStringAfter(contract.YiFang, Utility.SplitChar); contract.YiFang = YiFangArray[0]; Console.WriteLine("联合体:" + contract.UnionMember); } return(contract); }
/// <summary> /// 北车 /// </summary> /// <returns></returns> List <RecordBase> ExtractMultiFromNorthVehicle() { //主合同的抽取:(北车专用) //#151135: 若干项重大合同 //#153045: 若干项重大合同 //#153271: 若干项重大合同 //#175840: 若干项重大合同 var Records = new List <RecordBase>(); var isMulti = false; foreach (var p in root.Children) { foreach (var s in p.Children) { if (s.Content.Contains("若干项重大合同")) { isMulti = true; Console.WriteLine("若干项重大合同 ID:" + Id); } if (s.Content.StartsWith("<") && isMulti) { var ContractRec = new ContractRec(); ContractRec.Id = Id; //5 、本公司全资子公司中国北车集团大连机车车辆有限公司与大同地方铁路公司签订了约 3.26 亿元人民币的电力机车销售合同。 var i0 = s.Content.IndexOf("与"); var i1 = s.Content.IndexOf("签订"); if (i0 != -1 && i1 != -1 && i0 < i1) { ContractRec.JiaFang = s.Content.Substring(i0 + 1, i1 - i0 - 1); } foreach (var cn in companynamelist) { if (cn.isSubCompany && cn.positionId == s.PositionId) { ContractRec.YiFang = cn.secFullName; } } var ml = moneylist.Where((x) => x.Loc == s.PositionId).ToList(); var SpecailContractNames = new string[] { "地铁车辆出口合同", "地铁车辆牵引系统销售合同", "地铁车辆销售合同", "地铁销售合同", "电动客车销售合同", "电力机车销售合同", "动车组检修合同", "动车组销售合同", "风力发电机组销售合同", "货车出口合同", "货车检修合同", "货车销售合同", "货车修理合同", "机车出口合同", "机车大修及加改合同", "客车检修合同", "客车销售合同", "客车修理合同", "煤炭漏斗车销售合同", "内燃电传动机车销售合同", "内燃动车组销售合同", "内燃机车订单", "铁路客车修理合同", "有轨电车销售合同" }.ToList(); foreach (var scn in SpecailContractNames) { if (s.Content.Contains(scn)) { ContractRec.ContractName = scn; break; } } if (ml.Count == 1) { if (!String.IsNullOrEmpty(ContractRec.JiaFang) && !String.IsNullOrEmpty(ContractRec.YiFang)) { ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(ml.First().Value.MoneyAmount, String.Empty); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } } } } return(Records); }
List <RecordBase> ExtractMultiFromTable() { var Records = new List <RecordBase>(); var JiaFang = new TableSearchTitleRule(); JiaFang.Name = "甲方"; JiaFang.Title = new string[] { "采购人" }.ToList(); JiaFang.IsTitleEq = false; JiaFang.IsRequire = true; var YiFang = new TableSearchTitleRule(); YiFang.Name = "乙方"; //"投资者名称","股东名称" YiFang.Title = new string[] { "中标人" }.ToList(); YiFang.IsTitleEq = false; YiFang.IsRequire = true; var ProjectName = new TableSearchTitleRule(); ProjectName.Name = "项目名称"; ProjectName.Title = new string[] { "项目名称" }.ToList(); ProjectName.IsTitleEq = false; ProjectName.IsRequire = false; var Money = new TableSearchTitleRule(); Money.Name = "中标金额"; Money.Title = new string[] { "中标金额" }.ToList(); Money.IsTitleEq = false; Money.IsRequire = false; var Rules = new List <TableSearchTitleRule>(); Rules.Add(JiaFang); Rules.Add(YiFang); Rules.Add(ProjectName); Rules.Add(Money); var opt = new SearchOption(); opt.IsMeger = false; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count > 0) { Console.WriteLine("Table ExtractMulti ID:" + Id); foreach (var item in result) { var ContractRec = new ContractRec(); ContractRec.Id = Id; ContractRec.JiaFang = item[0].RawData; ContractRec.JiaFang = ContractRec.JiaFang.NormalizeTextResult(); ContractRec.YiFang = item[1].RawData; ContractRec.YiFang = ContractRec.YiFang.NormalizeTextResult(); foreach (var cn in companynamelist) { if (!String.IsNullOrEmpty(cn.secShortName) && cn.secShortName.Equals(ContractRec.YiFang)) { if (!string.IsNullOrEmpty(cn.secFullName)) { ContractRec.YiFang = cn.secFullName; break; } } } ContractRec.ProjectName = item[2].RawData; ContractRec.ProjectName = ContractRec.ProjectName.NormalizeTextResult(); ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(item[3].RawData, item[3].Title); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } return(Records); }
ContractRec ExtractSingle(MyRootHtmlNode root, String Id) { contractType = String.Empty; foreach (var paragrah in root.Children) { foreach (var item in paragrah.Children) { if (item.Content.Contains("中标")) { contractType = "中标"; break; } if (item.Content.Contains("合同")) { contractType = "合同"; break; } } if (contractType != String.Empty) { break; } } if (contractType == String.Empty) { Console.WriteLine("contractType Null:" + Id); } var contract = new ContractRec(); //公告ID contract.Id = Id; //甲方 contract.JiaFang = GetJiaFang(); contract.JiaFang = CompanyNameLogic.AfterProcessFullName(contract.JiaFang).secFullName; contract.JiaFang = contract.JiaFang.NormalizeTextResult(); //机构列表 var NiList = Nerlist.Where((n) => n.Type == LTPTrainingNER.enmNerType.Ni).Select((m) => m.RawData); if (!NiList.Contains(contract.JiaFang)) { //作为特殊单位,国家电网公司一般都是甲方 if (NiList.Contains("国家电网公司")) { contract.JiaFang = "国家电网公司"; } } //乙方 contract.YiFang = GetYiFang(); contract.YiFang = CompanyNameLogic.AfterProcessFullName(contract.YiFang).secFullName; contract.YiFang = contract.YiFang.NormalizeTextResult(); //按照规定除去括号 contract.YiFang = RegularTool.TrimBrackets(contract.YiFang); //项目 contract.ProjectName = GetProjectName(); if (contract.ProjectName.StartsWith("“") && contract.ProjectName.EndsWith("”")) { contract.ProjectName = contract.ProjectName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray()); } if (contract.ProjectName.EndsWith(",签约双方")) { contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, ",签约双方"); } if (contract.ProjectName.Contains("(以下简称")) { contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, "(以下简称"); } contract.ProjectName = contract.ProjectName.NormalizeTextResult(); //合同 if (contractType == "中标") { //按照数据分析来看,应该工程名 在中标的时候填写,合同名在合同的时候填写 contract.ContractName = String.Empty; } else { contract.ContractName = GetContractName(); if (contract.ContractName.StartsWith("“") && contract.ContractName.EndsWith("”")) { contract.ContractName = contract.ContractName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray()); } //去掉书名号 contract.ContractName = contract.ContractName.Replace("《", String.Empty).Replace("》", String.Empty); if (contract.ContractName.Contains("(以下简称")) { contract.ContractName = Utility.GetStringAfter(contract.ContractName, "(以下简称"); } contract.ContractName = contract.ContractName.NormalizeTextResult(); } //金额 var money = GetMoney(); contract.ContractMoneyUpLimit = MoneyUtility.Format(money.MoneyAmount, String.Empty); contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit; //联合体 contract.UnionMember = GetUnionMember(contract.JiaFang, contract.YiFang); contract.UnionMember = contract.UnionMember.NormalizeTextResult(); //按照规定除去括号 contract.UnionMember = RegularTool.TrimBrackets(contract.UnionMember); return(contract); }