public static string GetShortNameByFullName(String FullName, AnnouceDocument doc) { if (FullName.Length <= 4) { return(string.Empty); //名称或者已经是简称的场合,退出 } var quotationList = LocateProperty.LocateQuotation(doc.root, false); var fullnamelist = LocateProperty.LocateCustomerWord(doc.root, new string[] { FullName }.ToList()); var jianchenglist = LocateProperty.LocateCustomerWord(doc.root, new string[] { "简称" }.ToList()); foreach (var fn in fullnamelist) { var ql = quotationList.Where((x) => { return(x.Loc == fn.Loc && x.Description == "引号" && x.StartIdx > fn.StartIdx); }); foreach (var shrotmane in ql) { foreach (var jc in jianchenglist) { if (jc.Loc == fn.Loc && jc.StartIdx > fn.StartIdx && jc.StartIdx < shrotmane.StartIdx && (shrotmane.StartIdx - jc.StartIdx) <= 4) { if (shrotmane.Value.Length < FullName.Length) { return(shrotmane.Value); } } } } } return(string.Empty); }
/// <summary> /// 交易对方 /// </summary> /// <returns></returns> public List <string> getTradeCompany(ReorganizationRec target) { var rtn = new List <string>(); TradeCompany.IsRequire = true; var Rules = new List <TableSearchTitleRule>(); Rules.Add(TradeCompany); var opt = new HTMLTable.SearchOption(); opt.IsMeger = true; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count == 0) { return(rtn); } //首页表格提取出交易者列表 var tableid = result[0][0].TableId; //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据 //交易对方是释义表的一个项目,这里被错误识别为表头 //TODO:这里交易对方应该只选取文章前部的表格!! var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId)) .Select(x => x[0].RawData) .Where(y => !y.Contains("不超过")).ToList(); var TargetLoc = LocateProperty.LocateCustomerWord(root, new string[] { target.TargetCompanyFullName, target.TargetCompanyShortName }.ToList(), "标的"); var HolderLoc = LocateProperty.LocateCustomerWord(root, new string[] { "持有", "所持" }.ToList(), "持有"); var OwnerLoc = LocateProperty.LocateCustomerWord(root, TableTrades.ToList(), "交易对手"); CustomerList.AddRange(TargetLoc); CustomerList.AddRange(HolderLoc); CustomerList.AddRange(OwnerLoc); nermap.Anlayze(this); foreach (var nerlist in nermap.ParagraghlocateDict.Values) { //交易对手 持有 标的 这样的文字检索 int OwnerIdx = -1; int HolderIdx = -1; int TargetIdx = -1; nerlist.CustomerList.Sort((x, y) => { return(x.StartIdx.CompareTo(y.StartIdx)); }); var OwnerName = string.Empty; foreach (var ner in nerlist.CustomerList) { if (ner.Description == "交易对手") { OwnerIdx = ner.StartIdx; OwnerName = ner.Value; } if (ner.Description == "持有" && OwnerIdx != -1) { HolderIdx = ner.StartIdx; } if (ner.Description == "标的" && OwnerIdx != -1 && HolderIdx != -1) { TargetIdx = ner.StartIdx; } if (OwnerIdx != -1 && HolderIdx != -1 && TargetIdx != -1) { if (TargetIdx - OwnerIdx < 20) { rtn.Add(OwnerName); } OwnerIdx = -1; HolderIdx = -1; TargetIdx = -1; } } } return(rtn.Distinct().ToList()); }
public override List <RecordBase> Extract() { InitTableRules(); GetPersonList(); //是否在释义表中存在交易对手信息 foreach (var item in ExplainDict) { var keys = item.Key.Split(Utility.SplitChar); var keys2 = item.Key.Split(new char[] { '/', '/' }); if (keys.Length == 1 && keys2.Length > 1) { keys = keys2; } foreach (var k in keys) { if (k.Contains("交易对方")) { Console.WriteLine("交易对方条目:" + k); } } } //HTML结构 foreach (var item in root.Children) { //var title = item.Content.Normalize().NormalizeTextResult(); //Console.WriteLine(item.PositionId + ":" + title.Substring(0, Math.Min(20, title.Length))); } var list = new List <RecordBase>(); var targets = getTargetListFromExplainTable().Distinct().ToList(); if (targets.Count == 0) { return(list); } var EvaluateMethodLoc = LocateProperty.LocateCustomerWord(root, ReOrganizationTraning.EvaluateMethodList, "评估法"); this.CustomerList = EvaluateMethodLoc; nermap.Anlayze(this); foreach (var item in targets) { if (item.Target.Contains("发行")) { continue; } if (item.Target.Contains("置换")) { continue; } if (item.Target.Contains("置出")) { continue; } if (item.Target.Contains("置入")) { continue; } if (item.Target.Contains("本次")) { continue; } if (item.Target.Contains("出售")) { continue; } if (item.Target.Contains("购买")) { continue; } var reorgRec = new ReorganizationRec(); reorgRec.Id = this.Id; reorgRec.Target = item.Target; reorgRec.TargetCompany = item.Comany.TrimEnd("合计".ToArray()); //<1>XXXX公司的的对应 Regex r = new Regex(@"\<(\d+)\>"); if (r.IsMatch(reorgRec.TargetCompany)) { Console.WriteLine("Before Trim:" + reorgRec.TargetCompany); reorgRec.TargetCompany = r.Replace(reorgRec.TargetCompany, ""); Console.WriteLine("After Trim:" + reorgRec.TargetCompany); } if (reorgRec.TargetCompany.Equals("本公司")) { continue; } if (reorgRec.TargetCompany.Equals("标的公司")) { continue; } //标的公司的简称填补 foreach (var dict in ExplainDict) { var keys = dict.Key.Split(Utility.SplitChar); var keys2 = dict.Key.Split(new char[] { '/', '/' }); var isHit = false; if (keys.Length == 1 && keys2.Length > 1) { keys = keys2; } foreach (var key in keys) { if (key.Contains("标的")) { continue; } if (key.Contains("目标")) { continue; } if (key.Equals("上市公司")) { continue; } if (key.Equals("本公司")) { continue; } if (key.Equals(reorgRec.TargetCompany) || dict.Value.Equals(reorgRec.TargetCompany)) { var tempKey = key; if (tempKey.Contains(",")) { tempKey = Utility.GetStringBefore(tempKey, ","); } var tempvalue = dict.Value; if (tempvalue.Contains(",")) { tempvalue = Utility.GetStringBefore(tempvalue, ","); } reorgRec.TargetCompanyFullName = tempvalue; reorgRec.TargetCompanyShortName = tempKey; isHit = true; break; } } if (isHit) { break; } } var TradeCompany = getTradeCompany(reorgRec); reorgRec.TradeCompany = String.Join(Utility.SplitChar, TradeCompany); //根据各种模板规则获得的交易对手 var xTradeList = getTradeCompanyByKeyWord(reorgRec); if (xTradeList.Count == 1) { var xTrade = ""; xTrade = xTradeList.First(); //单个结果的情况下 if (!String.IsNullOrEmpty(xTrade)) { foreach (var dict in ExplainDict) { var keys = dict.Key.Split(Utility.SplitChar); var keys2 = dict.Key.Split(new char[] { '/', '/' }); var isHit = false; if (keys.Length == 1 && keys2.Length > 1) { keys = keys2; } foreach (var key in keys) { if (key.Contains("标的")) { continue; } if (key.Contains("目标")) { continue; } if (key.Equals("上市公司")) { continue; } if (key.Equals("本公司")) { continue; } if (key.Equals(xTrade) || dict.Value.Equals(xTrade)) { var tempKey = key; if (tempKey.Contains(",")) { tempKey = Utility.GetStringBefore(tempKey, ","); } var tempvalue = dict.Value; if (tempvalue.Contains(",")) { tempvalue = Utility.GetStringBefore(tempvalue, ","); } reorgRec.TradeCompanyFullName = tempvalue; reorgRec.TradeCompanyShortName = tempKey; isHit = true; break; } } if (isHit) { break; } } reorgRec.TradeCompany = xTrade; if (!String.IsNullOrEmpty(reorgRec.TradeCompanyFullName) && !String.IsNullOrEmpty(reorgRec.TradeCompanyShortName)) { reorgRec.TradeCompany = reorgRec.TradeCompanyFullName + "|" + reorgRec.TradeCompanyShortName; } } } if (xTradeList.Count > 1) { reorgRec.TradeCompany = String.Join(Utility.SplitChar, xTradeList); } //释义表中获得的交易对手,进行必要的订正 if (string.IsNullOrEmpty(reorgRec.TradeCompany)) { var xTradeListExplain = getTradeCompanyByExplain(reorgRec); foreach (var tradeItem in xTradeListExplain) { //交易公司的简称填补 foreach (var dict in ExplainDict) { var keys = dict.Key.Split(Utility.SplitChar); var keys2 = dict.Key.Split(new char[] { '/', '/' }); var isHit = false; if (keys.Length == 1 && keys2.Length > 1) { keys = keys2; } foreach (var key in keys) { if (key.Contains("标的")) { continue; } if (key.Contains("目标")) { continue; } if (key.Equals("上市公司")) { continue; } if (key.Equals("本公司")) { continue; } if (key.Equals(tradeItem.Value) || dict.Value.Equals(tradeItem.Value)) { var tempKey = key; if (tempKey.Contains(",")) { tempKey = Utility.GetStringBefore(tempKey, ","); } var tempvalue = dict.Value; if (tempvalue.Contains(",")) { tempvalue = Utility.GetStringBefore(tempvalue, ","); } reorgRec.TradeCompanyFullName = tempvalue; if (!tempKey.Equals("交易对方") && !tempKey.Equals("发行对象")) { reorgRec.TradeCompanyShortName = tempKey; } isHit = true; break; } } if (isHit) { break; } } reorgRec.TradeCompany = tradeItem.Value; if (!String.IsNullOrEmpty(reorgRec.TradeCompanyFullName) && !String.IsNullOrEmpty(reorgRec.TradeCompanyShortName)) { reorgRec.TradeCompany = reorgRec.TradeCompanyFullName + "|" + reorgRec.TradeCompanyShortName; } else { //中建六局及中建八局 var tradeArray = tradeItem.Value.Split(Utility.SplitChar).ToList(); var last = tradeArray.Last(); if (last.Contains("以及")) { tradeArray.RemoveAt(tradeArray.Count - 1); tradeArray.Add(Utility.GetStringBefore(last, "以及")); tradeArray.Add(Utility.GetStringAfter(last, "以及")); reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray); } else { if (last.Contains("及")) { tradeArray.RemoveAt(tradeArray.Count - 1); tradeArray.Add(Utility.GetStringBefore(last, "及")); tradeArray.Add(Utility.GetStringAfter(last, "及")); reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray); } if (last.Contains("和")) { tradeArray.RemoveAt(tradeArray.Count - 1); tradeArray.Add(Utility.GetStringBefore(last, "和")); tradeArray.Add(Utility.GetStringAfter(last, "和")); reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray); } } } Console.WriteLine("使用释义表的交易对手:" + tradeItem.Key + ":" + reorgRec.TradeCompany); break; } } //交易对手最后整型 NormalizeTradeCompany(reorgRec); var Price = GetPrice(reorgRec, targets.Count == 1); reorgRec.Price = MoneyUtility.Format(Price.MoneyAmount, String.Empty); reorgRec.EvaluateMethod = getEvaluateMethod(reorgRec, targets.Count == 1); if (!String.IsNullOrEmpty(reorgRec.TargetCompanyFullName) && !String.IsNullOrEmpty(reorgRec.TargetCompanyShortName)) { reorgRec.TargetCompany = reorgRec.TargetCompanyFullName + "|" + reorgRec.TargetCompanyShortName; } if (String.IsNullOrEmpty(reorgRec.TargetCompany) || String.IsNullOrEmpty(reorgRec.Target)) { continue; } //相同记录合并 var UnionKey = reorgRec.TargetCompany + reorgRec.Target; bool IsKeyExist = false; foreach (ReorganizationRec exist in list) { var existKey = exist.TargetCompany + exist.Target; if (UnionKey.Equals(existKey)) { IsKeyExist = true; break; } } if (!IsKeyExist) { list.Add(reorgRec); } } //价格或者评估表中出现过的(以下代码这里只是检证) if (PriceTable.Count != 0 && EvaluateTable.Count != 0 && PriceTable.Count == EvaluateTable.Count) { if (PriceTable.Count != list.Count) { Console.WriteLine(Id); foreach (var item in EvaluateTable) { Console.WriteLine("评估表:" + item[0].RawData.Replace(" ", "") + " Value:" + item[1].RawData); } foreach (var item in PriceTable) { Console.WriteLine("价格表:" + item[0].RawData.Replace(" ", "") + " Value:" + item[1].RawData); } foreach (ReorganizationRec item in list) { Console.WriteLine("抽出:" + item.TargetCompany + item.Target); } } } return(list); }