List <RecordBase> ExtractMultiCommon() { var MainRec = ExtractSingle(); //三项订单 //中标通知书6份 //中标通知书四份 //履行进展情况 var Records = new List <RecordBase>(); var isMulti = false; foreach (var p in root.Children) { foreach (var s in p.Children) { if (isMulti) { if (nermap.ParagraghlocateDict.ContainsKey(s.PositionId)) { var nerlist = nermap.ParagraghlocateDict[s.PositionId]; if (nerlist.moneylist.Count == 1) { var ContractRec = new ContractRec(); ContractRec.Id = Id; ContractRec.JiaFang = MainRec.JiaFang; ContractRec.YiFang = MainRec.YiFang; ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(nerlist.moneylist.First().Value.MoneyAmount, String.Empty); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } } else { var scan = NumberUtility.ConvertUpperToLower(s.Content).Replace(" ", ""); var cnt = RegularTool.GetRegular(scan, "中标通知书\\d份"); if (cnt.Count == 1) { Console.WriteLine(Id + ":" + cnt[0].RawData + "[" + scan + "]"); isMulti = true; } if (s.Content.Contains("履行进展情况")) { Console.WriteLine(Id + ":履行进展情况"); isMulti = true; } } } } return(Records); }
/// <summary> /// 正则表达式检索方法(前置,正则,后置) /// </summary> /// <param name="loc"></param> /// <param name="OrgString"></param> /// <param name="regularfeature"></param> /// <param name="SplitChar"></param> /// <returns></returns> public static List <LocAndValue <String> > RegularExFinder(int loc, string OrgString, struRegularExpressFeature regularfeature, string SplitChar = "") { var list = new List <LocAndValue <String> >(); var reglist = RegularTool.GetRegular(OrgString, regularfeature.RegularExpress); foreach (var reg in reglist) { //根据前后词语进行过滤 bool IsBeforeOK = true; string BeforeString = ""; if (regularfeature.LeadingWordList != null) { IsBeforeOK = false; //前置词语 foreach (var leading in regularfeature.LeadingWordList) { if (reg.Index - leading.Length >= 0) { var word = OrgString.Substring(reg.Index - leading.Length, leading.Length); if (word.Equals(leading)) { BeforeString = leading; IsBeforeOK = true; break; } else { continue; } } } } if (!IsBeforeOK) { continue; } bool IsAfterOK = true; string AfterString = ""; if (regularfeature.TrailingWordList != null) { IsAfterOK = false; //后置词语 foreach (var trailing in regularfeature.TrailingWordList) { if (reg.Index + reg.Length + trailing.Length <= OrgString.Length) { var word = OrgString.Substring(reg.Index + reg.Length, trailing.Length); if (word.Equals(trailing)) { AfterString = trailing; IsAfterOK = true; break; } else { continue; } } } } if (IsBeforeOK && IsAfterOK) { var Loc = new LocAndValue <String>() { Value = BeforeString + SplitChar + reg.RawData + SplitChar + AfterString, StartIdx = reg.Index - BeforeString.Length, Loc = loc }; list.Add(Loc); } } return(list); }
/// <summary> /// 根据表头标题抽取 /// </summary> /// <param name="root"></param> /// <param name="id"></param> /// <returns></returns> List <RecordBase> ExtractFromTable() { var StockHolderRule = new TableSearchTitleRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Title = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人", "姓名" }.ToList(); StockHolderRule.IsTitleEq = true; StockHolderRule.IsRequire = true; var ChangeDateRule = new TableSearchTitleRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间", "减持时间", "增持时间", "减持股份时间", "增持股份时间", "买入时间", "卖出时间" }.ToList(); ChangeDateRule.IsTitleEq = false; ChangeDateRule.Normalize = NormailizeEndChangeDate; var ChangePriceRule = new TableSearchTitleRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Title = new string[] { "买入均价", "卖出均价", "成交均价", "减持价格", "增持价格", "减持股均价", "增持股均价", "减持均", "增持均", "价格区间" }.ToList(); ChangePriceRule.IsTitleEq = false; ChangePriceRule.Normalize = (x, y) => { var prices = RegularTool.GetRegular(x, RegularTool.MoneyExpress); if (prices.Count == 0) { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } } else { //增减持,区间的情况,取最高价,假设最后一个数字是最大的 return(prices.Last().RawData); } return(x); }; var ChangeNumberRule = new TableSearchTitleRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Title = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量", "买入股份数", "卖出股份数", "股数" }.ToList(); ChangeNumberRule.IsTitleEq = false; ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber; var Rules = new List <TableSearchTitleRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count == 0) { //没有抽取到任何数据 Rules.Clear(); ChangeDateRule.IsRequire = true; Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count == 0) { return(new List <RecordBase>()); } var NewResult = new List <CellInfo[]>(); var Name = GetHolderName(); if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName)) { return(new List <RecordBase>()); } foreach (var item in result) { NewResult.Add(new CellInfo[] { new CellInfo() { RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName }, item[0], item[1], item[2] }); } result = NewResult; } var holderafterlist = GetHolderAfter(); var stockchangelist = new List <RecordBase>(); foreach (var rec in result) { var stockchange = new StockChangeRec(); stockchange.Id = Id; var ModifyName = rec[0].RawData; //表格里面长的名字可能被分页切割掉 //这里使用合计表进行验证 if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName)) { foreach (var item in holderafterlist) { if (item.Name.EndsWith("先生")) { break; //特殊处理,没有逻辑可言 } if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } } } var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName); stockchange.HolderFullName = Name.FullName.NormalizeTextResult(); stockchange.HolderShortName = Name.ShortName; if (stockchange.HolderFullName.Contains("简称")) { stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称"); stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty); stockchange.HolderFullName = Utility.GetStringBefore(stockchange.HolderFullName, "("); } stockchange.ChangeEndDate = rec[1].RawData; DateTime x; if (!DateTime.TryParse(stockchange.ChangeEndDate, out x)) { //无法处理的情况 if (!Program.IsDebugMode) { //非调试模式 stockchange.ChangeEndDate = String.Empty; } } if (!String.IsNullOrEmpty(rec[2].RawData)) { //股价区间化的去除 if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至"))) { stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty); stockchange.ChangePrice = stockchange.ChangePrice.Replace("*", ""); stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult(); } } if (!RegularTool.IsUnsign(stockchange.ChangePrice)) { if (!String.IsNullOrEmpty(stockchange.ChangePrice)) { Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]"); } stockchange.ChangePrice = String.Empty; } if (!String.IsNullOrEmpty(rec[3].RawData)) { stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty); stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult(); if (!RegularTool.IsUnsign(stockchange.ChangeNumber)) { if (!String.IsNullOrEmpty(stockchange.ChangeNumber)) { Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]"); } stockchange.ChangeNumber = String.Empty; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0") { continue; } stockchangelist.Add(stockchange); } //寻找所有的股东全称 var namelist = stockchangelist.Select(x => ((StockChangeRec)x).HolderFullName).Distinct().ToList(); var newRec = new List <StockChangeRec>(); foreach (var name in namelist) { var stocklist = stockchangelist.Where((x) => { return(((StockChangeRec)x).HolderFullName == name); }).ToList(); stocklist.Sort((x, y) => { return(((StockChangeRec)x).ChangeEndDate.CompareTo(((StockChangeRec)x).ChangeEndDate)); }); var last = (StockChangeRec)stocklist.Last(); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; after.Name = after.Name.Replace(" ", ""); if (after.Name == last.HolderFullName || after.Name == last.HolderShortName) { stockchangelist.Remove(last); //结构体,无法直接修改!!使用删除,增加的方法 last.HoldNumberAfterChange = after.Count; last.HoldPercentAfterChange = after.Percent; newRec.Add(last); } } } if (holderafterlist.Count != namelist.Count) { if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("增持者数量确认!"); } } stockchangelist.AddRange(newRec); return(stockchangelist); }