void GetPersonList() { //交易对象 var rtn = new List <(string TargetCompany, string TradeCompany)>(); TradeCompany.IsRequire = true; var Rules = new List <TableSearchTitleRule>(); Rules.Add(TradeCompany); var opt = new HTMLTable.SearchOption(); opt.IsMeger = true; opt.IsContainTotalRow = true; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count != 0) { //首页表格提取出交易者列表 var tableid = result[0][0].TableId; //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据 //交易对方是释义表的一个项目,这里被错误识别为表头 //TODO:这里交易对方应该只选取文章前部的表格!! var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId)) .Select(x => x[0].RawData) .Where(y => !y.Contains("不超过")).ToList(); PersonList.AddRange(TableTrades); } foreach (var e in ExplainDict) { if (e.Value.Contains("自然人")) { var PersonArray = e.Value.Split(Utility.SplitChar); foreach (var person in PersonArray) { if (person.Contains("等") || person.Contains("自然人")) { var trimPerson = person; if (trimPerson.Contains("等")) { trimPerson = Utility.GetStringBefore(trimPerson, "等"); } if (trimPerson.Contains("自然人")) { trimPerson = Utility.GetStringBefore(trimPerson, "自然人"); } PersonList.Add(trimPerson); } else { PersonList.Add(person); } } } } }
List <RecordBase> ExtractMultiFromTable() { var Records = new List <RecordBase>(); var JiaFang = new TableSearchTitleRule(); JiaFang.Name = "甲方"; JiaFang.Title = new string[] { "采购人" }.ToList(); JiaFang.IsTitleEq = false; JiaFang.IsRequire = true; var YiFang = new TableSearchTitleRule(); YiFang.Name = "乙方"; //"投资者名称","股东名称" YiFang.Title = new string[] { "中标人" }.ToList(); YiFang.IsTitleEq = false; YiFang.IsRequire = true; var ProjectName = new TableSearchTitleRule(); ProjectName.Name = "项目名称"; ProjectName.Title = new string[] { "项目名称" }.ToList(); ProjectName.IsTitleEq = false; ProjectName.IsRequire = false; var Money = new TableSearchTitleRule(); Money.Name = "中标金额"; Money.Title = new string[] { "中标金额" }.ToList(); Money.IsTitleEq = false; Money.IsRequire = false; var Rules = new List <TableSearchTitleRule>(); Rules.Add(JiaFang); Rules.Add(YiFang); Rules.Add(ProjectName); Rules.Add(Money); var opt = new SearchOption(); opt.IsMeger = false; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count > 0) { Console.WriteLine("Table ExtractMulti ID:" + Id); foreach (var item in result) { var ContractRec = new ContractRec(); ContractRec.Id = Id; ContractRec.JiaFang = item[0].RawData; ContractRec.JiaFang = ContractRec.JiaFang.NormalizeTextResult(); ContractRec.YiFang = item[1].RawData; ContractRec.YiFang = ContractRec.YiFang.NormalizeTextResult(); foreach (var cn in companynamelist) { if (!String.IsNullOrEmpty(cn.secShortName) && cn.secShortName.Equals(ContractRec.YiFang)) { if (!string.IsNullOrEmpty(cn.secFullName)) { ContractRec.YiFang = cn.secFullName; break; } } } ContractRec.ProjectName = item[2].RawData; ContractRec.ProjectName = ContractRec.ProjectName.NormalizeTextResult(); ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(item[3].RawData, item[3].Title); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } return(Records); }
/// <summary> /// 交易对方 /// </summary> /// <returns></returns> public List <string> getTradeCompany(ReorganizationRec target) { var rtn = new List <string>(); TradeCompany.IsRequire = true; var Rules = new List <TableSearchTitleRule>(); Rules.Add(TradeCompany); var opt = new HTMLTable.SearchOption(); opt.IsMeger = true; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count == 0) { return(rtn); } //首页表格提取出交易者列表 var tableid = result[0][0].TableId; //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据 //交易对方是释义表的一个项目,这里被错误识别为表头 //TODO:这里交易对方应该只选取文章前部的表格!! var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId)) .Select(x => x[0].RawData) .Where(y => !y.Contains("不超过")).ToList(); var TargetLoc = LocateProperty.LocateCustomerWord(root, new string[] { target.TargetCompanyFullName, target.TargetCompanyShortName }.ToList(), "标的"); var HolderLoc = LocateProperty.LocateCustomerWord(root, new string[] { "持有", "所持" }.ToList(), "持有"); var OwnerLoc = LocateProperty.LocateCustomerWord(root, TableTrades.ToList(), "交易对手"); CustomerList.AddRange(TargetLoc); CustomerList.AddRange(HolderLoc); CustomerList.AddRange(OwnerLoc); nermap.Anlayze(this); foreach (var nerlist in nermap.ParagraghlocateDict.Values) { //交易对手 持有 标的 这样的文字检索 int OwnerIdx = -1; int HolderIdx = -1; int TargetIdx = -1; nerlist.CustomerList.Sort((x, y) => { return(x.StartIdx.CompareTo(y.StartIdx)); }); var OwnerName = string.Empty; foreach (var ner in nerlist.CustomerList) { if (ner.Description == "交易对手") { OwnerIdx = ner.StartIdx; OwnerName = ner.Value; } if (ner.Description == "持有" && OwnerIdx != -1) { HolderIdx = ner.StartIdx; } if (ner.Description == "标的" && OwnerIdx != -1 && HolderIdx != -1) { TargetIdx = ner.StartIdx; } if (OwnerIdx != -1 && HolderIdx != -1 && TargetIdx != -1) { if (TargetIdx - OwnerIdx < 20) { rtn.Add(OwnerName); } OwnerIdx = -1; HolderIdx = -1; TargetIdx = -1; } } } return(rtn.Distinct().ToList()); }
List <struHoldAfter> GetHolderAfter3rdStep() { var HoldList = new List <struHoldAfter>(); var StockHolderRule = new TableSearchTitleRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Title = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人" }.ToList(); StockHolderRule.IsTitleEq = true; StockHolderRule.IsRequire = true; var HoldNumberAfterChangeRule = new TableSearchTitleRule(); HoldNumberAfterChangeRule.Name = "变动后持股数"; HoldNumberAfterChangeRule.IsRequire = true; HoldNumberAfterChangeRule.SuperTitle = new string[] { "减持后", "增持后" }.ToList(); HoldNumberAfterChangeRule.IsSuperTitleEq = false; HoldNumberAfterChangeRule.Title = new string[] { "持股股数", "持股股数", "持股数量", "持股数量", "持股总数", "持股总数", "股数" }.ToList(); HoldNumberAfterChangeRule.IsTitleEq = false; var HoldPercentAfterChangeRule = new TableSearchTitleRule(); HoldPercentAfterChangeRule.Name = "变动后持股数比例"; HoldPercentAfterChangeRule.IsRequire = true; HoldPercentAfterChangeRule.SuperTitle = HoldNumberAfterChangeRule.SuperTitle; HoldPercentAfterChangeRule.IsSuperTitleEq = false; HoldPercentAfterChangeRule.Title = new string[] { "比例" }.ToList(); HoldPercentAfterChangeRule.IsTitleEq = false; var Rules = new List <TableSearchTitleRule>(); Rules.Add(StockHolderRule); Rules.Add(HoldNumberAfterChangeRule); Rules.Add(HoldPercentAfterChangeRule); var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count != 0) { foreach (var item in result) { var HolderName = item[0].RawData; var strHolderCnt = item[1].RawData; strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt); string HolderCnt = getAfterstock(item[1].Title, strHolderCnt); var StrPercent = item[2].RawData; var HodlerPercent = getAfterpercent(StrPercent); //Console.WriteLine("GetHolderAfter3rdStep:" + HolderName); HoldList.Add(new struHoldAfter() { Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false }); } } else { StockHolderRule.SuperTitle = StockHolderRule.Title; StockHolderRule.IsSuperTitleEq = true; StockHolderRule.Title.Clear(); StockHolderRule.IsTitleEq = false; Rules = new List <TableSearchTitleRule>(); Rules.Add(HoldNumberAfterChangeRule); Rules.Add(HoldPercentAfterChangeRule); Rules.Add(StockHolderRule); result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count != 0) { foreach (var item in result) { var HolderName = item[2].RawData; var strHolderCnt = item[0].RawData; strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt); string HolderCnt = getAfterstock(item[1].Title, strHolderCnt); var StrPercent = item[1].RawData; var HodlerPercent = getAfterpercent(StrPercent); //Console.WriteLine("GetHolderAfter4thStep:" + HolderName); HoldList.Add(new struHoldAfter() { Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false }); } } } return(HoldList); }
/// <summary> /// 根据表头标题抽取 /// </summary> /// <param name="root"></param> /// <param name="id"></param> /// <returns></returns> List <RecordBase> ExtractFromTable() { var StockHolderRule = new TableSearchTitleRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Title = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人", "姓名" }.ToList(); StockHolderRule.IsTitleEq = true; StockHolderRule.IsRequire = true; var ChangeDateRule = new TableSearchTitleRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间", "减持时间", "增持时间", "减持股份时间", "增持股份时间", "买入时间", "卖出时间" }.ToList(); ChangeDateRule.IsTitleEq = false; ChangeDateRule.Normalize = NormailizeEndChangeDate; var ChangePriceRule = new TableSearchTitleRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Title = new string[] { "买入均价", "卖出均价", "成交均价", "减持价格", "增持价格", "减持股均价", "增持股均价", "减持均", "增持均", "价格区间" }.ToList(); ChangePriceRule.IsTitleEq = false; ChangePriceRule.Normalize = (x, y) => { var prices = RegularTool.GetRegular(x, RegularTool.MoneyExpress); if (prices.Count == 0) { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } } else { //增减持,区间的情况,取最高价,假设最后一个数字是最大的 return(prices.Last().RawData); } return(x); }; var ChangeNumberRule = new TableSearchTitleRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Title = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量", "买入股份数", "卖出股份数", "股数" }.ToList(); ChangeNumberRule.IsTitleEq = false; ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber; var Rules = new List <TableSearchTitleRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count == 0) { //没有抽取到任何数据 Rules.Clear(); ChangeDateRule.IsRequire = true; Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, false); if (result.Count == 0) { return(new List <RecordBase>()); } var NewResult = new List <CellInfo[]>(); var Name = GetHolderName(); if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName)) { return(new List <RecordBase>()); } foreach (var item in result) { NewResult.Add(new CellInfo[] { new CellInfo() { RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName }, item[0], item[1], item[2] }); } result = NewResult; } var holderafterlist = GetHolderAfter(); var stockchangelist = new List <RecordBase>(); foreach (var rec in result) { var stockchange = new StockChangeRec(); stockchange.Id = Id; var ModifyName = rec[0].RawData; //表格里面长的名字可能被分页切割掉 //这里使用合计表进行验证 if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName)) { foreach (var item in holderafterlist) { if (item.Name.EndsWith("先生")) { break; //特殊处理,没有逻辑可言 } if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } } } var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName); stockchange.HolderFullName = Name.FullName.NormalizeTextResult(); stockchange.HolderShortName = Name.ShortName; if (stockchange.HolderFullName.Contains("简称")) { stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称"); stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty); stockchange.HolderFullName = Utility.GetStringBefore(stockchange.HolderFullName, "("); } stockchange.ChangeEndDate = rec[1].RawData; DateTime x; if (!DateTime.TryParse(stockchange.ChangeEndDate, out x)) { //无法处理的情况 if (!Program.IsDebugMode) { //非调试模式 stockchange.ChangeEndDate = String.Empty; } } if (!String.IsNullOrEmpty(rec[2].RawData)) { //股价区间化的去除 if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至"))) { stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty); stockchange.ChangePrice = stockchange.ChangePrice.Replace("*", ""); stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult(); } } if (!RegularTool.IsUnsign(stockchange.ChangePrice)) { if (!String.IsNullOrEmpty(stockchange.ChangePrice)) { Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]"); } stockchange.ChangePrice = String.Empty; } if (!String.IsNullOrEmpty(rec[3].RawData)) { stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty); stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult(); if (!RegularTool.IsUnsign(stockchange.ChangeNumber)) { if (!String.IsNullOrEmpty(stockchange.ChangeNumber)) { Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]"); } stockchange.ChangeNumber = String.Empty; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0") { continue; } stockchangelist.Add(stockchange); } //寻找所有的股东全称 var namelist = stockchangelist.Select(x => ((StockChangeRec)x).HolderFullName).Distinct().ToList(); var newRec = new List <StockChangeRec>(); foreach (var name in namelist) { var stocklist = stockchangelist.Where((x) => { return(((StockChangeRec)x).HolderFullName == name); }).ToList(); stocklist.Sort((x, y) => { return(((StockChangeRec)x).ChangeEndDate.CompareTo(((StockChangeRec)x).ChangeEndDate)); }); var last = (StockChangeRec)stocklist.Last(); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; after.Name = after.Name.Replace(" ", ""); if (after.Name == last.HolderFullName || after.Name == last.HolderShortName) { stockchangelist.Remove(last); //结构体,无法直接修改!!使用删除,增加的方法 last.HoldNumberAfterChange = after.Count; last.HoldPercentAfterChange = after.Percent; newRec.Add(last); } } } if (holderafterlist.Count != namelist.Count) { if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("增持者数量确认!"); } } stockchangelist.AddRange(newRec); return(stockchangelist); }
List <RecordBase> GetMultiTarget(HTMLEngine.MyRootHtmlNode root, IncreaseStockRec SampleincreaseStock) { var PublishTarget = new TableSearchTitleRule(); PublishTarget.Name = "认购对象"; //"投资者名称","股东名称" PublishTarget.Title = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList(); PublishTarget.IsTitleEq = true; PublishTarget.IsRequire = true; var IncreaseNumber = new TableSearchTitleRule(); IncreaseNumber.Name = "增发数量"; IncreaseNumber.Title = new string[] { "配售股数", "认购数量", "认购股数", "认购股份数", "发行股份数", "配售数量" }.ToList(); IncreaseNumber.IsTitleEq = false; //包含即可 IncreaseNumber.Normalize = NumberUtility.NormalizerStockNumber; var IncreaseMoney = new TableSearchTitleRule(); IncreaseMoney.Name = "增发金额"; IncreaseMoney.Title = new string[] { "配售金额", "认购金额", "获配金额" }.ToList(); IncreaseMoney.IsTitleEq = false; //包含即可 IncreaseMoney.Normalize = MoneyUtility.Format; var FreezeYear = new TableSearchTitleRule(); FreezeYear.Name = "锁定期"; FreezeYear.Title = new string[] { "锁定期", "限售期" }.ToList(); FreezeYear.IsTitleEq = false; //包含即可 FreezeYear.Normalize = NormalizerFreezeYear; var BuyPrice = new TableSearchTitleRule(); BuyPrice.Name = "价格"; BuyPrice.Title = new string[] { "认购价格", "配售价格", "申购报价" }.ToList(); BuyPrice.IsTitleEq = false; //包含即可 BuyPrice.Normalize = MoneyUtility.Format; var Rules = new List <TableSearchTitleRule>(); Rules.Add(PublishTarget); Rules.Add(IncreaseNumber); Rules.Add(IncreaseMoney); Rules.Add(FreezeYear); Rules.Add(BuyPrice); var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, true); var increaseStocklist = new List <RecordBase>(); foreach (var item in result) { var increase = new IncreaseStockRec(); increase.Id = SampleincreaseStock.Id; increase.BuyMethod = SampleincreaseStock.BuyMethod; increase.PublishTarget = item[0].RawData; if (String.IsNullOrEmpty(increase.PublishTarget)) { continue; } increase.PublishTarget = increase.PublishTarget.NormalizeTextResult(); increase.IncreaseNumber = item[1].RawData; if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Equals("0")) { continue; } if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Contains("|")) { increase.IncreaseNumber = increase.IncreaseNumber.Split("|").Last(); } increase.IncreaseMoney = item[2].RawData; if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Equals("0")) { continue; } if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Contains("|")) { increase.IncreaseMoney = increase.IncreaseMoney.Split("|").Last(); } //手工计算金额 if (String.IsNullOrEmpty(increase.IncreaseMoney)) { if (!String.IsNullOrEmpty(increase.IncreaseNumber)) { if (!String.IsNullOrEmpty(item[4].RawData)) { double price; if (double.TryParse(item[4].RawData, out price)) { double number; if (double.TryParse(increase.IncreaseNumber, out number)) { double money = price * number; if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("通过计算获得金额:" + money.ToString()); } } } } } } increase.FreezeYear = item[3].RawData; increaseStocklist.Add(increase); } return(increaseStocklist); }