static string GetYiFang(HTMLEngine.MyRootHtmlNode root) { var Extractor = new ExtractProperty(); //这些关键字后面 Extractor.LeadingWordList = new string[] { "供应商名称:" }; Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { Program.Logger.WriteLine("甲方候补词(关键字):[" + item + "]"); return(item); } //乙方:"有限公司" Extractor = new ExtractProperty(); //这些关键字后面 Extractor.TrailingWordList = new string[] { "有限公司董事会" }; Extractor.Extract(root); Extractor.CandidateWord.Reverse(); foreach (var item in Extractor.CandidateWord) { Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "有限公司]"); return(item + "有限公司"); } if (companynamelist.Count > 0) { return(companynamelist[companynamelist.Count - 1].secFullName); } return(""); }
static string GetMoney(HTMLEngine.MyRootHtmlNode root) { var Money = ""; var Extractor = new EntityProperty(); //这些关键字后面 Extractor.LeadingWordList = new string[] { "中标金额", "中标价", "合同金额", "合同总价", "订单总金额" }; Extractor.Extract(root); var AllMoneyList = new List <Tuple <String, String> >(); foreach (var item in Extractor.CandidateWord) { var ml = Utility.SeekMoney(item); AllMoneyList.AddRange(ml); } if (AllMoneyList.Count == 0) { return(""); } foreach (var m in AllMoneyList) { if (m.Item2 == "人民币" || m.Item2 == "元") { Money = m.Item1; break; } } if (Money == "") { Money = AllMoneyList[0].Item1; } Program.Logger.WriteLine("金额候补词:[" + Money + "]"); return(Money); }
static List <struIncreaseStock> GetMultiTarget(HTMLEngine.MyRootHtmlNode root, struIncreaseStock SampleincreaseStock) { var BuyerRule = new TableSearchRule(); BuyerRule.Name = "认购对象"; //"投资者名称","股东名称" BuyerRule.Rule = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList(); BuyerRule.IsEq = true; var BuyNumber = new TableSearchRule(); BuyNumber.Name = "增发数量"; BuyNumber.Rule = new string[] { "配售股数", "认购数量", "认购股份数" }.ToList(); BuyNumber.IsEq = false; //包含即可 BuyNumber.Normalize = Normalizer.NormalizerStockNumber; var BuyMoney = new TableSearchRule(); BuyMoney.Name = "增发金额"; BuyMoney.Rule = new string[] { "配售金额", "认购金额" }.ToList(); BuyMoney.IsEq = false; //包含即可 BuyMoney.Normalize = Normalizer.NormalizerMoney; var FreezeYear = new TableSearchRule(); FreezeYear.Name = "锁定期"; FreezeYear.Rule = new string[] { "锁定期", "限售期" }.ToList(); FreezeYear.IsEq = false; //包含即可 FreezeYear.Normalize = NormalizerFreezeYear; var Rules = new List <TableSearchRule>(); Rules.Add(BuyerRule); Rules.Add(BuyNumber); Rules.Add(BuyMoney); Rules.Add(FreezeYear); var result = HTMLTable.GetMultiInfo(root, Rules, true); var increaseStocklist = new List <struIncreaseStock>(); foreach (var item in result) { var increase = new struIncreaseStock(); increase.id = SampleincreaseStock.id; increase.BuyMethod = SampleincreaseStock.BuyMethod; increase.PublishTarget = item[0].RawData; if (String.IsNullOrEmpty(increase.PublishTarget)) { continue; } increase.IncreaseNumber = item[1].RawData; increase.IncreaseMoney = item[2].RawData; increase.FreezeYear = item[3].RawData; increaseStocklist.Add(increase); } return(increaseStocklist); }
static string getBuyMethod(HTMLEngine.MyRootHtmlNode root) { //是否包含关键字 "现金认购" var cnt = EntityProperty.FindWordCnt("现金认购", root).Count; Program.Logger.WriteLine("现金认购(文本):" + cnt); if (cnt > 0) { return("现金"); } return(""); }
static string getBuyMethod(HTMLEngine.MyRootHtmlNode root) { //是否包含关键字 "现金认购" var Extractor = new ExtractProperty(); var cnt = Extractor.FindWordCnt("现金认购", root); Program.Logger.WriteLine("现金认购(文本):" + cnt); if (cnt > 0) { return("现金"); } return(""); }
/// <summary> /// 认购方式 /// </summary> /// <param name="root"></param> /// <returns></returns> string getBuyMethod(HTMLEngine.MyRootHtmlNode root) { var p = new EntityProperty(); //是否包含关键字 "现金认购" p.KeyWordMap.Add("现金认购", "现金"); p.Extract(this); if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("认购方式:" + string.Join(Utility.SplitChar, p.WordMapResult)); } return(string.Join(Utility.SplitChar, p.WordMapResult)); }
/// <summary> /// 获得符合规则的行数据 /// </summary> /// <param name="root"></param> /// <param name="rule"></param> /// <returns></returns> public static List <List <CellInfo> > GetMultiRowsByContentRule(HTMLEngine.MyRootHtmlNode root, TableSearchContentRule rule) { var Container = new List <List <CellInfo> >(); for (int tableNo = 1; tableNo <= root.TableList.Count; tableNo++) { var table = new HTMLTable(root.TableList[tableNo]); var RowHeader = table.GetRow(1); for (int RowNo = 1; RowNo < table.RowCount; RowNo++) { var row = table.GetRow(RowNo); var IsMatch = false; foreach (var cell in row) { if (rule.Content != null) { foreach (var content in rule.Content) { if (rule.IsContentEq) { //相等模式 if (content.Equals(cell.Replace(" ", ""))) { Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader)); IsMatch = true; break; } } else { //包含模式 if (content.Contains(cell.Replace(" ", ""))) { Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader)); IsMatch = true; break; } } } } if (IsMatch) { break; } } } } return(Container); }
static string GetMoney(HTMLEngine.MyRootHtmlNode node) { var Money = ""; var Extractor = new ExtractProperty(); //这些关键字后面 Extractor.LeadingWordList = new string[] { "中标金额", "中标价", "合同金额", "合同总价", "订单总金额" }; Extractor.Extract(node); foreach (var item in Extractor.CandidateWord) { Money = Utility.SeekMoney(item, ""); Program.Logger.WriteLine("金额候补词:[" + Money + "]"); } return(Money); }
//变动截止日期 static string GetChangeEndDate(HTMLEngine.MyRootHtmlNode root) { var Extractor = new EntityProperty(); var StartArray = new string[] { "截止", "截至" }; var EndArray = new string[] { "日" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { Program.Logger.WriteLine("候补变动截止日期:[" + item + "]"); return(Normalizer.NormailizeDate(item + "日")); } return(""); }
//认购方式 string getBuyMethod(HTMLEngine.MyRootHtmlNode root) { var p = new EntityProperty(); //是否包含关键字 "现金认购" p.KeyWordMap.Add("现金认购", "现金"); p.Extract(this); if (!String.IsNullOrEmpty(p.WordMapResult)) { if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("认购方式:" + p.WordMapResult); } } return(p.WordMapResult); }
static string GetYiFang(HTMLEngine.MyRootHtmlNode root) { var Extractor = new EntityProperty(); //这些关键字后面 Extractor.LeadingWordList = new string[] { "供应商名称:", "乙方:" }; //"中标单位:","中标人:","中标单位:","中标人:","乙方(供方):","承包人:","承包方:","中标方:","供应商名称:","中标人名称:" Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "]"); return(item.Trim()); } //乙方:"有限公司" Extractor = new EntityProperty(); //这些关键字后面 Extractor.TrailingWordList = new string[] { "有限公司董事会" }; Extractor.Extract(root); Extractor.CandidateWord.Reverse(); foreach (var item in Extractor.CandidateWord) { //如果有子公司的话,优先使用子公司 foreach (var c in companynamelist) { if (c.isSubCompany) { return(c.secFullName); } } Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "有限公司]"); return(item.Trim() + "有限公司"); } if (companynamelist.Count > 0) { return(companynamelist[companynamelist.Count - 1].secFullName); } return(""); }
static string GetHolderFullName(HTMLEngine.MyRootHtmlNode root) { var Extractor = new EntityProperty(); var StartArray = new string[] { "接到", "收到", "股东" }; var EndArray = new string[] { "的", "通知", "告知函", "减持", "增持", "《" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var word in Extractor.CandidateWord) { if (word.Contains("简称")) { Program.Logger.WriteLine("候补股东全称修正:[" + word + "]"); return(word); } } if (Extractor.CandidateWord.Count > 0) { return(Extractor.CandidateWord[0]); } return(""); }
static string GetUnionMember(HTMLEngine.MyRootHtmlNode root, String YiFang) { var paragrahlist = EntityProperty.FindWordCnt("联合体", root); var Union = new List <String>(); foreach (var paragrahId in paragrahlist) { foreach (var comp in companynamelist) { if (comp.positionId == paragrahId) { if (!Union.Contains(comp.secFullName)) { if (!comp.secFullName.Equals(YiFang)) { Union.Add(comp.secFullName); } } } } } return(String.Join("、", Union)); }
public static List<CellInfo[]> GetMultiInfo(HTMLEngine.MyRootHtmlNode root, List<TableSearchRule> Rules, bool IsMeger) { var Container = new List<CellInfo[]>(); for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++) { var table = new HTMLTable(root.TableList[tableIndex + 1]); var HeaderRow = table.GetHeaderRow(); var checkResult = new int[Rules.Count]; for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++) { //在每个行首单元格检索 for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++) { if (Rules[checkItemIdx].IsEq) { //相等模式:规则里面没有该词语 if (!Rules[checkItemIdx].Rule.Contains(HeaderRow[ColIndex])) continue; } else { bool IsMatch = false; //包含模式 foreach (var r in Rules[checkItemIdx].Rule) { if (HeaderRow[ColIndex].Contains(r)) { IsMatch = true; break; } } if (!IsMatch) continue; } //找到列位置 checkResult[checkItemIdx] = ColIndex + 1; break; } //主字段没有找到,其他不用找了 if (checkResult[0] == 0) break; } //主字段没有找到,下一张表 if (checkResult[0] == 0) continue; for (int RowNo = 2; RowNo <= table.RowCount; RowNo++) { if (table.IsTotalRow(RowNo)) continue; //非合计行 var target = table.CellValue(RowNo, checkResult[0]); //主字段非空 if (target == "" || target == "<rowspan>" || target == "<colspan>" || target == "<null>") continue; if (Rules[0].Rule.Contains(target)) continue; var RowData = new CellInfo[Rules.Count]; for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++) { if (checkResult[checkItemIdx] == 0) continue; var ColNo = checkResult[checkItemIdx]; RowData[checkItemIdx].TableId = tableIndex + 1; RowData[checkItemIdx].Row = RowNo; RowData[checkItemIdx].Column = ColNo; if (table.CellValue(RowNo, ColNo).Equals("<null>")) continue; RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo); if (Rules[checkItemIdx].Normalize != null) { RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]); } } var HasSame = false; foreach (var existRow in Container) { if (IsSameContent(existRow, RowData)) { HasSame = true; break; } } if (!HasSame) Container.Add(RowData); } } if (IsMeger) Container = MergerMultiInfo(Container); return Container; }
static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Rule = new string[] { "股东名称" }.ToList(); StockHolderRule.IsEq = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Rule = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList(); ChangeDateRule.IsEq = false; ChangeDateRule.Normalize = Normalizer.NormailizeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Rule = new string[] { "减持均价", "增持均价" }.ToList(); ChangePriceRule.IsEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Rule = new string[] { "减持股数", "增持股数" }.ToList(); ChangeNumberRule.IsEq = false; ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录 result.Reverse(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var Name = NormalizeCompanyName(rec[0].RawData); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = rec[1].RawData; stockchange.ChangePrice = rec[2].RawData; stockchange.ChangeNumber = rec[3].RawData; var holderafterlist = GetHolderAfter(root); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; if (after.Used) { continue; } if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName) { stockchange.HoldNumberAfterChange = after.Count; stockchange.HoldPercentAfterChange = after.Percent; after.Used = true; break; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } stockchangelist.Add(stockchange); } //合并记录 for (int i = 0; i < stockchangelist.Count; i++) { var x = stockchangelist[i]; for (int j = i + 1; j < stockchangelist.Count; j++) { var y = stockchangelist[j]; if (x.GetKey() == y.GetKey()) { if (string.IsNullOrEmpty(x.HoldNumberAfterChange) && !string.IsNullOrEmpty(y.HoldNumberAfterChange)) { x.id = ""; } if (!string.IsNullOrEmpty(x.HoldNumberAfterChange) && string.IsNullOrEmpty(y.HoldNumberAfterChange)) { y.id = ""; } } } } return(stockchangelist.Where((x) => { return !String.IsNullOrEmpty(x.id); }).ToList()); }
List <RecordBase> GetMultiTarget(HTMLEngine.MyRootHtmlNode root, IncreaseStockRec SampleincreaseStock) { var PublishTarget = new TableSearchTitleRule(); PublishTarget.Name = "认购对象"; //"投资者名称","股东名称" PublishTarget.Title = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList(); PublishTarget.IsTitleEq = true; PublishTarget.IsRequire = true; var IncreaseNumber = new TableSearchTitleRule(); IncreaseNumber.Name = "增发数量"; IncreaseNumber.Title = new string[] { "配售股数", "认购数量", "认购股数", "认购股份数", "发行股份数", "配售数量" }.ToList(); IncreaseNumber.IsTitleEq = false; //包含即可 IncreaseNumber.Normalize = NumberUtility.NormalizerStockNumber; var IncreaseMoney = new TableSearchTitleRule(); IncreaseMoney.Name = "增发金额"; IncreaseMoney.Title = new string[] { "配售金额", "认购金额", "获配金额" }.ToList(); IncreaseMoney.IsTitleEq = false; //包含即可 IncreaseMoney.Normalize = MoneyUtility.Format; var FreezeYear = new TableSearchTitleRule(); FreezeYear.Name = "锁定期"; FreezeYear.Title = new string[] { "锁定期", "限售期" }.ToList(); FreezeYear.IsTitleEq = false; //包含即可 FreezeYear.Normalize = NormalizerFreezeYear; var BuyPrice = new TableSearchTitleRule(); BuyPrice.Name = "价格"; BuyPrice.Title = new string[] { "认购价格", "配售价格", "申购报价" }.ToList(); BuyPrice.IsTitleEq = false; //包含即可 BuyPrice.Normalize = MoneyUtility.Format; var Rules = new List <TableSearchTitleRule>(); Rules.Add(PublishTarget); Rules.Add(IncreaseNumber); Rules.Add(IncreaseMoney); Rules.Add(FreezeYear); Rules.Add(BuyPrice); var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, true); var increaseStocklist = new List <RecordBase>(); foreach (var item in result) { var increase = new IncreaseStockRec(); increase.Id = SampleincreaseStock.Id; increase.BuyMethod = SampleincreaseStock.BuyMethod; increase.PublishTarget = item[0].RawData; if (String.IsNullOrEmpty(increase.PublishTarget)) { continue; } increase.PublishTarget = increase.PublishTarget.NormalizeTextResult(); increase.IncreaseNumber = item[1].RawData; if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Equals("0")) { continue; } if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Contains("|")) { increase.IncreaseNumber = increase.IncreaseNumber.Split("|").Last(); } increase.IncreaseMoney = item[2].RawData; if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Equals("0")) { continue; } if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Contains("|")) { increase.IncreaseMoney = increase.IncreaseMoney.Split("|").Last(); } //手工计算金额 if (String.IsNullOrEmpty(increase.IncreaseMoney)) { if (!String.IsNullOrEmpty(increase.IncreaseNumber)) { if (!String.IsNullOrEmpty(item[4].RawData)) { double price; if (double.TryParse(item[4].RawData, out price)) { double number; if (double.TryParse(increase.IncreaseNumber, out number)) { double money = price * number; if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("通过计算获得金额:" + money.ToString()); } } } } } } increase.FreezeYear = item[3].RawData; increaseStocklist.Add(increase); } return(increaseStocklist); }
static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Rule = new string[] { "股东名称" }.ToList(); StockHolderRule.IsEq = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Rule = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList(); ChangeDateRule.IsEq = false; ChangeDateRule.Normalize = Normalizer.NormailizeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Rule = new string[] { "减持均价", "增持均价" }.ToList(); ChangePriceRule.IsEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Rule = new string[] { "减持股数", "增持股数" }.ToList(); ChangeNumberRule.IsEq = false; ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录 result.Reverse(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var Name = NormalizeCompanyName(rec[0].RawData); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = rec[1].RawData; stockchange.ChangePrice = rec[2].RawData; stockchange.ChangeNumber = rec[3].RawData; var holderafterlist = GetHolderAfter(root); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; if (after.Used) { continue; } if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName) { stockchange.HoldNumberAfterChange = after.Count; stockchange.HoldPercentAfterChange = after.Percent; after.Used = true; break; } } stockchangelist.Add(stockchange); } return(stockchangelist); }
List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Title = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人" }.ToList(); StockHolderRule.IsTitleEq = true; StockHolderRule.IsRequire = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间", "减持时间", "增持时间", "减持股份时间", "增持股份时间" }.ToList(); ChangeDateRule.IsTitleEq = false; ChangeDateRule.Normalize = NormailizeEndChangeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Title = new string[] { "成交均价", "减持价格", "增持价格", "减持均", "增持均" }.ToList(); ChangePriceRule.IsTitleEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Title = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量" }.ToList(); ChangeNumberRule.IsTitleEq = false; ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); if (result.Count == 0) { //没有抽取到任何数据 Rules.Clear(); ChangeDateRule.IsRequire = true; Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); result = HTMLTable.GetMultiInfo(root, Rules, false); if (result.Count == 0) { return(new List <struStockChange>()); } var NewResult = new List <CellInfo[]>(); var Name = GetHolderName(this.root); if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName)) { return(new List <struStockChange>()); } foreach (var item in result) { NewResult.Add(new CellInfo[] { new CellInfo() { RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName }, item[0], item[1], item[2] }); } result = NewResult; } var holderafterlist = GetHolderAfter(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var ModifyName = rec[0].RawData; //表格里面长的名字可能被分页切割掉 //这里使用合计表进行验证 if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName)) { foreach (var item in holderafterlist) { if (item.Name.EndsWith("先生")) { break; //特殊处理,没有逻辑可言 } if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } } } var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName); stockchange.HolderFullName = Name.FullName.NormalizeTextResult(); stockchange.HolderShortName = Name.ShortName; if (stockchange.HolderFullName.Contains("简称")) { stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称"); stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty); stockchange.HolderFullName = Utility.GetStringBefore(stockchange.HolderFullName, "("); } stockchange.ChangeEndDate = rec[1].RawData; DateTime x; if (!DateTime.TryParse(stockchange.ChangeEndDate, out x)) { //无法处理的情况 if (!Program.IsDebugMode) { //非调试模式 stockchange.ChangeEndDate = String.Empty; } } if (!String.IsNullOrEmpty(rec[2].RawData)) { //股价区间化的去除 if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至"))) { stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty); stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult(); } } if (!RegularTool.IsUnsign(stockchange.ChangePrice)) { if (!String.IsNullOrEmpty(stockchange.ChangePrice)) { Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]"); } stockchange.ChangePrice = String.Empty; } if (!String.IsNullOrEmpty(rec[3].RawData)) { stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty); stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult(); if (!RegularTool.IsUnsign(stockchange.ChangeNumber)) { if (!String.IsNullOrEmpty(stockchange.ChangeNumber)) { Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]"); } stockchange.ChangeNumber = String.Empty; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0") { continue; } stockchangelist.Add(stockchange); } //寻找所有的股东全称 var namelist = stockchangelist.Select(x => x.HolderFullName).Distinct().ToList(); var newRec = new List <struStockChange>(); foreach (var name in namelist) { var stocklist = stockchangelist.Where((x) => { return(x.HolderFullName == name); }).ToList(); stocklist.Sort((x, y) => { return(x.ChangeEndDate.CompareTo(y.ChangeEndDate)); }); var last = stocklist.Last(); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; after.Name = after.Name.Replace(" ", ""); if (after.Name == last.HolderFullName || after.Name == last.HolderShortName) { stockchangelist.Remove(last); //结构体,无法直接修改!!使用删除,增加的方法 last.HoldNumberAfterChange = after.Count; last.HoldPercentAfterChange = after.Percent; newRec.Add(last); } } } if (holderafterlist.Count != namelist.Count) { if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("增持者数量确认!"); } } stockchangelist.AddRange(newRec); return(stockchangelist); }
/// <summary> /// 标题优先度 /// </summary> /// <param name="root"></param> /// <param name="Rules"></param> /// <param name="IsMeger"></param> /// <returns></returns> public static List <CellInfo[]> GetMultiInfoByTitleRules(HTMLEngine.MyRootHtmlNode root, List <TableSearchTitleRule> Rules, bool IsMeger) { var Container = new List <CellInfo[]>(); for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++) { var table = new HTMLTable(root.TableList[tableIndex + 1]); var checkResultColumnNo = new int[Rules.Count]; var checkResultTitle = new string[Rules.Count]; var HeaderRowNo = -1; String[] HeaderRow = null; var IsFirstRowOneCell = false; //第一行是否为整行合并 for (int TestRowHeader = 1; TestRowHeader < table.RowCount; TestRowHeader++) { checkResultColumnNo = new int[Rules.Count]; var IsOneColumnRow = true; //是否整行合并 for (int i = 2; i <= table.ColumnCount; i++) { if (table.CellValue(TestRowHeader, i) != (table.CellValue(TestRowHeader, 1))) { IsOneColumnRow = false; break; } } if (IsOneColumnRow) { if (TestRowHeader == 1) { IsFirstRowOneCell = true; } continue; } HeaderRow = table.GetRow(TestRowHeader); for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++) { foreach (var EvaluateTitle in Rules[checkItemIdx].Title) { //根据标题优先度检索,对每个标题单独检索 for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++) { //在每个行首单元格检索 //标题的处理 if (Rules[checkItemIdx].IsTitleEq) { //相等模式 if (!EvaluateTitle.Equals(HeaderRow[ColIndex].Replace(" ", ""))) { continue; } if (Rules[checkItemIdx].ExcludeTitle != null) { var isOK = true; foreach (var word in Rules[checkItemIdx].ExcludeTitle) { if (HeaderRow[ColIndex].Contains(word)) { isOK = false; break; } } if (!isOK) { continue; } } } else { //包含模式 if (!HeaderRow[ColIndex].Replace(" ", "").Contains(EvaluateTitle)) { continue; } if (Rules[checkItemIdx].ExcludeTitle != null) { var isOK = true; foreach (var word in Rules[checkItemIdx].ExcludeTitle) { if (HeaderRow[ColIndex].Contains(word)) { isOK = false; break; } } if (!isOK) { continue; } } } //父标题的处理 if (Rules[checkItemIdx].SuperTitle != null && Rules[checkItemIdx].SuperTitle.Count != 0) { //具有父标题的情况 var IsFoundSuperTitle = false; for (int superRowNo = 1; superRowNo < TestRowHeader; superRowNo++) { var value = table.CellValue(superRowNo, ColIndex + 1).Replace(" ", ""); if (Rules[checkItemIdx].IsSuperTitleEq) { //等于 if (Rules[checkItemIdx].SuperTitle.Contains(value)) { IsFoundSuperTitle = true; break; } } else { //包含 foreach (var supertitle in Rules[checkItemIdx].SuperTitle) { if (value.Contains(supertitle)) { IsFoundSuperTitle = true; break; } } } if (IsFoundSuperTitle) { break; } } if (!IsFoundSuperTitle) { continue; } } checkResultTitle[checkItemIdx] = HeaderRow[ColIndex]; checkResultColumnNo[checkItemIdx] = ColIndex + 1; break; } if (!String.IsNullOrEmpty(checkResultTitle[checkItemIdx])) { break; } } //主字段没有找到,其他不用找了 if (checkResultColumnNo[0] == 0) { break; } } bool IsAllRequiredItemOK = true; for (int checkItemIdx = 0; checkItemIdx < checkResultColumnNo.Length; checkItemIdx++) { if (checkResultColumnNo[checkItemIdx] == 0 && Rules[checkItemIdx].IsRequire) { IsAllRequiredItemOK = false; break; } } if (IsAllRequiredItemOK) { if (TestRowHeader == 1 || IsFirstRowOneCell) { HeaderRowNo = TestRowHeader; break; } else { //对于标题栏非首行的情况,如果不是首行是一个大的整行合并单元格,则做严格检查 //进行严格的检查,暂时要求全匹配 var IsOK = true; for (int i = 0; i < Rules.Count; i++) { if (checkResultColumnNo[i] == 0) { IsOK = false; break; } } if (IsOK) { HeaderRowNo = TestRowHeader; break; } } } } //主字段没有找到,下一张表 if (HeaderRowNo == -1) { continue; } for (int RowNo = HeaderRowNo; RowNo <= table.RowCount; RowNo++) { if (RowNo == HeaderRowNo) { continue; } if (table.IsTotalRow(RowNo)) { continue; //非合计行 } var target = table.CellValue(RowNo, checkResultColumnNo[0]); //主字段非空 if (target == String.Empty || target == strRowSpanValue || target == strColSpanValue || target == strNullValue) { continue; } if (Rules[0].Title.Contains(target)) { continue; } var RowData = new CellInfo[Rules.Count]; for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++) { if (checkResultColumnNo[checkItemIdx] == 0) { continue; } var ColNo = checkResultColumnNo[checkItemIdx]; RowData[checkItemIdx].TableId = tableIndex + 1; RowData[checkItemIdx].Row = RowNo; RowData[checkItemIdx].Column = ColNo; RowData[checkItemIdx].Title = checkResultTitle[checkItemIdx]; if (table.CellValue(RowNo, ColNo).Equals(strNullValue)) { continue; } RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo); if (Rules[checkItemIdx].Normalize != null) { RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]); } } var HasSame = false; foreach (var existRow in Container) { if (IsSameContent(existRow, RowData)) { HasSame = true; break; } } if (!HasSame) { Container.Add(RowData); } } } if (IsMeger) { Container = MergerMultiInfo(Container); } return(Container); }