internal static struStockChange ConvertFromString(string str) { var Array = str.Split("\t"); var c = new struStockChange(); c.id = Array[0]; c.HolderFullName = Array[1]; c.HolderShortName = Array[2]; if (Array.Length > 3) { c.ChangeEndDate = Array[3]; } if (Array.Length > 4) { c.ChangePrice = Array[4]; } if (Array.Length > 5) { c.ChangeNumber = Array[5]; } if (Array.Length > 6) { c.HoldNumberAfterChange = Array[6]; } if (Array.Length == 8) { c.HoldPercentAfterChange = Array[7]; } return(c); }
public static List <struStockChange> Extract(string htmlFileName) { var list = new List <struStockChange>(); var fi = new System.IO.FileInfo(htmlFileName); Program.Logger.WriteLine("Start FileName:[" + fi.Name + "]"); var node = HTMLEngine.Anlayze(htmlFileName); companynamelist = BussinessLogic.GetCompanyNameByCutWord(node); list = ExtractFromTable(node, fi.Name.Replace(".html", "")); if (list.Count > 0) { return(list); } var stockchange = new struStockChange(); //公告ID stockchange.id = fi.Name.Replace(".html", ""); Program.Logger.WriteLine("公告ID:" + stockchange.id); var Name = NormalizeCompanyName(GetHolderFullName(node)); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = GetChangeEndDate(node); list.Add(stockchange); return(list); }
public List <struStockChange> Extract() { var DateRange = LocateDateRange(root); var list = new List <struStockChange>(); var Name = GetHolderName(); if (!String.IsNullOrEmpty(Name.FullName) && !String.IsNullOrEmpty(Name.ShortName)) { companynamelist.Add(new struCompanyName() { secFullName = Name.FullName, secShortName = Name.ShortName }); } list = ExtractFromTable(); //list = ExtractFromTableByContent(); if (list.Count > 0) { return(list); //如果这里直接返回,由于召回率等因素,可以细微提高成绩 } var stockchange = new struStockChange(); //公告ID stockchange.id = Id; //if (!Program.IsMultiThreadMode) Program.Logger.WriteLine("公告ID:" + stockchange.id); stockchange.HolderFullName = Name.FullName.NormalizeTextResult(); if (EntityWordAnlayzeTool.TrimEnglish(stockchange.HolderFullName).Length > ContractTraning.MaxYiFangLength) { stockchange.HolderFullName = String.Empty; } stockchange.HolderShortName = Name.ShortName; stockchange.ChangeEndDate = GetChangeEndDate(root); DateTime x; if (!DateTime.TryParse(stockchange.ChangeEndDate, out x)) { //无法处理的情况 if (!Program.IsDebugMode) { //非调试模式 stockchange.ChangeEndDate = String.Empty; } } if (!string.IsNullOrEmpty(stockchange.HolderFullName) && !string.IsNullOrEmpty(stockchange.ChangeEndDate)) { if (!stockchange.HolderFullName.Contains("增持") && !stockchange.HolderFullName.Contains("减持")) { list.Add(stockchange); } } return(list); }
internal static string ConvertToString(struStockChange increaseStock) { var record = increaseStock.id + "," + increaseStock.HolderFullName + "," + increaseStock.HolderShortName + "," + increaseStock.ChangeEndDate + ","; record += Normalizer.NormalizeNumberResult(increaseStock.ChangePrice) + ","; record += Normalizer.NormalizeNumberResult(increaseStock.ChangeNumber) + ","; record += Normalizer.NormalizeNumberResult(increaseStock.HoldNumberAfterChange) + ","; record += Normalizer.NormalizeNumberResult(increaseStock.HoldPercentAfterChange) + ","; return(record); }
static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Rule = new string[] { "股东名称" }.ToList(); StockHolderRule.IsEq = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Rule = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList(); ChangeDateRule.IsEq = false; ChangeDateRule.Normalize = Normalizer.NormailizeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Rule = new string[] { "减持均价", "增持均价" }.ToList(); ChangePriceRule.IsEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Rule = new string[] { "减持股数", "增持股数" }.ToList(); ChangeNumberRule.IsEq = false; ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录 result.Reverse(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var Name = NormalizeCompanyName(rec[0].RawData); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = rec[1].RawData; stockchange.ChangePrice = rec[2].RawData; stockchange.ChangeNumber = rec[3].RawData; var holderafterlist = GetHolderAfter(root); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; if (after.Used) { continue; } if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName) { stockchange.HoldNumberAfterChange = after.Count; stockchange.HoldPercentAfterChange = after.Percent; after.Used = true; break; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } stockchangelist.Add(stockchange); } //合并记录 for (int i = 0; i < stockchangelist.Count; i++) { var x = stockchangelist[i]; for (int j = i + 1; j < stockchangelist.Count; j++) { var y = stockchangelist[j]; if (x.GetKey() == y.GetKey()) { if (string.IsNullOrEmpty(x.HoldNumberAfterChange) && !string.IsNullOrEmpty(y.HoldNumberAfterChange)) { x.id = ""; } if (!string.IsNullOrEmpty(x.HoldNumberAfterChange) && string.IsNullOrEmpty(y.HoldNumberAfterChange)) { y.id = ""; } } } } return(stockchangelist.Where((x) => { return !String.IsNullOrEmpty(x.id); }).ToList()); }
List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Title = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人" }.ToList(); StockHolderRule.IsTitleEq = true; StockHolderRule.IsRequire = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间", "减持时间", "增持时间", "减持股份时间", "增持股份时间" }.ToList(); ChangeDateRule.IsTitleEq = false; ChangeDateRule.Normalize = NormailizeEndChangeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Title = new string[] { "成交均价", "减持价格", "增持价格", "减持均", "增持均" }.ToList(); ChangePriceRule.IsTitleEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Title = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量" }.ToList(); ChangeNumberRule.IsTitleEq = false; ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); if (result.Count == 0) { //没有抽取到任何数据 Rules.Clear(); ChangeDateRule.IsRequire = true; Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); result = HTMLTable.GetMultiInfo(root, Rules, false); if (result.Count == 0) { return(new List <struStockChange>()); } var NewResult = new List <CellInfo[]>(); var Name = GetHolderName(this.root); if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName)) { return(new List <struStockChange>()); } foreach (var item in result) { NewResult.Add(new CellInfo[] { new CellInfo() { RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName }, item[0], item[1], item[2] }); } result = NewResult; } var holderafterlist = GetHolderAfter(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var ModifyName = rec[0].RawData; //表格里面长的名字可能被分页切割掉 //这里使用合计表进行验证 if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName)) { foreach (var item in holderafterlist) { if (item.Name.EndsWith("先生")) { break; //特殊处理,没有逻辑可言 } if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName)) { ModifyName = item.Name; break; } } } var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName); stockchange.HolderFullName = Name.FullName.NormalizeTextResult(); stockchange.HolderShortName = Name.ShortName; if (stockchange.HolderFullName.Contains("简称")) { stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称"); stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty); stockchange.HolderFullName = Utility.GetStringBefore(stockchange.HolderFullName, "("); } stockchange.ChangeEndDate = rec[1].RawData; DateTime x; if (!DateTime.TryParse(stockchange.ChangeEndDate, out x)) { //无法处理的情况 if (!Program.IsDebugMode) { //非调试模式 stockchange.ChangeEndDate = String.Empty; } } if (!String.IsNullOrEmpty(rec[2].RawData)) { //股价区间化的去除 if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至"))) { stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty); stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult(); } } if (!RegularTool.IsUnsign(stockchange.ChangePrice)) { if (!String.IsNullOrEmpty(stockchange.ChangePrice)) { Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]"); } stockchange.ChangePrice = String.Empty; } if (!String.IsNullOrEmpty(rec[3].RawData)) { stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty); stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult(); if (!RegularTool.IsUnsign(stockchange.ChangeNumber)) { if (!String.IsNullOrEmpty(stockchange.ChangeNumber)) { Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]"); } stockchange.ChangeNumber = String.Empty; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0") { continue; } stockchangelist.Add(stockchange); } //寻找所有的股东全称 var namelist = stockchangelist.Select(x => x.HolderFullName).Distinct().ToList(); var newRec = new List <struStockChange>(); foreach (var name in namelist) { var stocklist = stockchangelist.Where((x) => { return(x.HolderFullName == name); }).ToList(); stocklist.Sort((x, y) => { return(x.ChangeEndDate.CompareTo(y.ChangeEndDate)); }); var last = stocklist.Last(); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; after.Name = after.Name.Replace(" ", ""); if (after.Name == last.HolderFullName || after.Name == last.HolderShortName) { stockchangelist.Remove(last); //结构体,无法直接修改!!使用删除,增加的方法 last.HoldNumberAfterChange = after.Count; last.HoldPercentAfterChange = after.Percent; newRec.Add(last); } } } if (holderafterlist.Count != namelist.Count) { if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("增持者数量确认!"); } } stockchangelist.AddRange(newRec); return(stockchangelist); }
static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Rule = new string[] { "股东名称" }.ToList(); StockHolderRule.IsEq = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Rule = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList(); ChangeDateRule.IsEq = false; ChangeDateRule.Normalize = Normalizer.NormailizeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Rule = new string[] { "减持均价", "增持均价" }.ToList(); ChangePriceRule.IsEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Rule = new string[] { "减持股数", "增持股数" }.ToList(); ChangeNumberRule.IsEq = false; ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录 result.Reverse(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var Name = NormalizeCompanyName(rec[0].RawData); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = rec[1].RawData; stockchange.ChangePrice = rec[2].RawData; stockchange.ChangeNumber = rec[3].RawData; var holderafterlist = GetHolderAfter(root); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; if (after.Used) { continue; } if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName) { stockchange.HoldNumberAfterChange = after.Count; stockchange.HoldPercentAfterChange = after.Percent; after.Used = true; break; } } stockchangelist.Add(stockchange); } return(stockchangelist); }