Пример #1
0
    public static string GetShortNameByFullName(String FullName, AnnouceDocument doc)
    {
        if (FullName.Length <= 4)
        {
            return(string.Empty);                      //名称或者已经是简称的场合,退出
        }
        var quotationList = LocateProperty.LocateQuotation(doc.root, false);
        var fullnamelist  = LocateProperty.LocateCustomerWord(doc.root, new string[] { FullName }.ToList());
        var jianchenglist = LocateProperty.LocateCustomerWord(doc.root, new string[] { "简称" }.ToList());

        foreach (var fn in fullnamelist)
        {
            var ql = quotationList.Where((x) =>
            {
                return(x.Loc == fn.Loc && x.Description == "引号" && x.StartIdx > fn.StartIdx);
            });
            foreach (var shrotmane in ql)
            {
                foreach (var jc in jianchenglist)
                {
                    if (jc.Loc == fn.Loc && jc.StartIdx > fn.StartIdx &&
                        jc.StartIdx < shrotmane.StartIdx &&
                        (shrotmane.StartIdx - jc.StartIdx) <= 4)
                    {
                        if (shrotmane.Value.Length < FullName.Length)
                        {
                            return(shrotmane.Value);
                        }
                    }
                }
            }
        }
        return(string.Empty);
    }
Пример #2
0
    public static List <struContract> Extract(string htmlFileName)
    {
        //模式1:只有一个主合同
        //模式2:只有多个子合同
        //模式3:有一个主合同以及多个子合同
        var ContractList = new List <struContract>();
        var fi           = new System.IO.FileInfo(htmlFileName);

        Program.Logger.WriteLine("Start FileName:[" + fi.Name + "]");
        var root = HTMLEngine.Anlayze(htmlFileName);

        companynamelist = BussinessLogic.GetCompanyNameByCutWord(root);
        datelist        = LocateProperty.LocateDate(root);
        moneylist       = LocateProperty.LocateMoney(root);

        var Id = fi.Name.Replace(".html", "");

        Program.Logger.WriteLine("公告ID:" + Id);
        //主合同的抽取
        ContractList.Add(ExtractSingle(root, Id));
        return(ContractList);
    }
Пример #3
0
    /// <summary>
    /// 交易对方
    /// </summary>
    /// <returns></returns>
    public List <string> getTradeCompany(ReorganizationRec target)
    {
        var rtn = new List <string>();

        TradeCompany.IsRequire = true;
        var Rules = new List <TableSearchTitleRule>();

        Rules.Add(TradeCompany);
        var opt = new HTMLTable.SearchOption();

        opt.IsMeger = true;
        var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt);

        if (result.Count == 0)
        {
            return(rtn);
        }
        //首页表格提取出交易者列表
        var tableid = result[0][0].TableId;
        //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据
        //交易对方是释义表的一个项目,这里被错误识别为表头
        //TODO:这里交易对方应该只选取文章前部的表格!!
        var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId))
                          .Select(x => x[0].RawData)
                          .Where(y => !y.Contains("不超过")).ToList();
        var TargetLoc = LocateProperty.LocateCustomerWord(root, new string[] { target.TargetCompanyFullName, target.TargetCompanyShortName }.ToList(), "标的");
        var HolderLoc = LocateProperty.LocateCustomerWord(root, new string[] { "持有", "所持" }.ToList(), "持有");
        var OwnerLoc  = LocateProperty.LocateCustomerWord(root, TableTrades.ToList(), "交易对手");

        CustomerList.AddRange(TargetLoc);
        CustomerList.AddRange(HolderLoc);
        CustomerList.AddRange(OwnerLoc);
        nermap.Anlayze(this);
        foreach (var nerlist in nermap.ParagraghlocateDict.Values)
        {
            //交易对手 持有 标的 这样的文字检索
            int OwnerIdx  = -1;
            int HolderIdx = -1;
            int TargetIdx = -1;
            nerlist.CustomerList.Sort((x, y) => { return(x.StartIdx.CompareTo(y.StartIdx)); });
            var OwnerName = string.Empty;
            foreach (var ner in nerlist.CustomerList)
            {
                if (ner.Description == "交易对手")
                {
                    OwnerIdx  = ner.StartIdx;
                    OwnerName = ner.Value;
                }
                if (ner.Description == "持有" && OwnerIdx != -1)
                {
                    HolderIdx = ner.StartIdx;
                }
                if (ner.Description == "标的" && OwnerIdx != -1 && HolderIdx != -1)
                {
                    TargetIdx = ner.StartIdx;
                }
                if (OwnerIdx != -1 && HolderIdx != -1 && TargetIdx != -1)
                {
                    if (TargetIdx - OwnerIdx < 20)
                    {
                        rtn.Add(OwnerName);
                    }
                    OwnerIdx  = -1;
                    HolderIdx = -1;
                    TargetIdx = -1;
                }
            }
        }
        return(rtn.Distinct().ToList());
    }
Пример #4
0
    public override List <RecordBase> Extract()
    {
        InitTableRules();
        GetPersonList();
        //是否在释义表中存在交易对手信息
        foreach (var item in ExplainDict)
        {
            var keys  = item.Key.Split(Utility.SplitChar);
            var keys2 = item.Key.Split(new char[] { '/', '/' });
            if (keys.Length == 1 && keys2.Length > 1)
            {
                keys = keys2;
            }
            foreach (var k in keys)
            {
                if (k.Contains("交易对方"))
                {
                    Console.WriteLine("交易对方条目:" + k);
                }
            }
        }
        //HTML结构
        foreach (var item in root.Children)
        {
            //var title = item.Content.Normalize().NormalizeTextResult();
            //Console.WriteLine(item.PositionId + ":" + title.Substring(0, Math.Min(20, title.Length)));
        }

        var list    = new List <RecordBase>();
        var targets = getTargetListFromExplainTable().Distinct().ToList();

        if (targets.Count == 0)
        {
            return(list);
        }
        var EvaluateMethodLoc = LocateProperty.LocateCustomerWord(root, ReOrganizationTraning.EvaluateMethodList, "评估法");

        this.CustomerList = EvaluateMethodLoc;
        nermap.Anlayze(this);
        foreach (var item in targets)
        {
            if (item.Target.Contains("发行"))
            {
                continue;
            }
            if (item.Target.Contains("置换"))
            {
                continue;
            }
            if (item.Target.Contains("置出"))
            {
                continue;
            }
            if (item.Target.Contains("置入"))
            {
                continue;
            }
            if (item.Target.Contains("本次"))
            {
                continue;
            }
            if (item.Target.Contains("出售"))
            {
                continue;
            }
            if (item.Target.Contains("购买"))
            {
                continue;
            }

            var reorgRec = new ReorganizationRec();
            reorgRec.Id            = this.Id;
            reorgRec.Target        = item.Target;
            reorgRec.TargetCompany = item.Comany.TrimEnd("合计".ToArray());
            //<1>XXXX公司的的对应
            Regex r = new Regex(@"\<(\d+)\>");
            if (r.IsMatch(reorgRec.TargetCompany))
            {
                Console.WriteLine("Before Trim:" + reorgRec.TargetCompany);
                reorgRec.TargetCompany = r.Replace(reorgRec.TargetCompany, "");
                Console.WriteLine("After  Trim:" + reorgRec.TargetCompany);
            }
            if (reorgRec.TargetCompany.Equals("本公司"))
            {
                continue;
            }
            if (reorgRec.TargetCompany.Equals("标的公司"))
            {
                continue;
            }
            //标的公司的简称填补
            foreach (var dict in ExplainDict)
            {
                var keys  = dict.Key.Split(Utility.SplitChar);
                var keys2 = dict.Key.Split(new char[] { '/', '/' });
                var isHit = false;
                if (keys.Length == 1 && keys2.Length > 1)
                {
                    keys = keys2;
                }
                foreach (var key in keys)
                {
                    if (key.Contains("标的"))
                    {
                        continue;
                    }
                    if (key.Contains("目标"))
                    {
                        continue;
                    }
                    if (key.Equals("上市公司"))
                    {
                        continue;
                    }
                    if (key.Equals("本公司"))
                    {
                        continue;
                    }
                    if (key.Equals(reorgRec.TargetCompany) || dict.Value.Equals(reorgRec.TargetCompany))
                    {
                        var tempKey = key;
                        if (tempKey.Contains(","))
                        {
                            tempKey = Utility.GetStringBefore(tempKey, ",");
                        }

                        var tempvalue = dict.Value;
                        if (tempvalue.Contains(","))
                        {
                            tempvalue = Utility.GetStringBefore(tempvalue, ",");
                        }
                        reorgRec.TargetCompanyFullName  = tempvalue;
                        reorgRec.TargetCompanyShortName = tempKey;
                        isHit = true;
                        break;
                    }
                }
                if (isHit)
                {
                    break;
                }
            }

            var TradeCompany = getTradeCompany(reorgRec);
            reorgRec.TradeCompany = String.Join(Utility.SplitChar, TradeCompany);
            //根据各种模板规则获得的交易对手
            var xTradeList = getTradeCompanyByKeyWord(reorgRec);
            if (xTradeList.Count == 1)
            {
                var xTrade = "";
                xTrade = xTradeList.First();
                //单个结果的情况下
                if (!String.IsNullOrEmpty(xTrade))
                {
                    foreach (var dict in ExplainDict)
                    {
                        var keys  = dict.Key.Split(Utility.SplitChar);
                        var keys2 = dict.Key.Split(new char[] { '/', '/' });
                        var isHit = false;
                        if (keys.Length == 1 && keys2.Length > 1)
                        {
                            keys = keys2;
                        }
                        foreach (var key in keys)
                        {
                            if (key.Contains("标的"))
                            {
                                continue;
                            }
                            if (key.Contains("目标"))
                            {
                                continue;
                            }
                            if (key.Equals("上市公司"))
                            {
                                continue;
                            }
                            if (key.Equals("本公司"))
                            {
                                continue;
                            }
                            if (key.Equals(xTrade) || dict.Value.Equals(xTrade))
                            {
                                var tempKey = key;
                                if (tempKey.Contains(","))
                                {
                                    tempKey = Utility.GetStringBefore(tempKey, ",");
                                }

                                var tempvalue = dict.Value;
                                if (tempvalue.Contains(","))
                                {
                                    tempvalue = Utility.GetStringBefore(tempvalue, ",");
                                }
                                reorgRec.TradeCompanyFullName  = tempvalue;
                                reorgRec.TradeCompanyShortName = tempKey;
                                isHit = true;
                                break;
                            }
                        }
                        if (isHit)
                        {
                            break;
                        }
                    }
                    reorgRec.TradeCompany = xTrade;
                    if (!String.IsNullOrEmpty(reorgRec.TradeCompanyFullName) &&
                        !String.IsNullOrEmpty(reorgRec.TradeCompanyShortName))
                    {
                        reorgRec.TradeCompany = reorgRec.TradeCompanyFullName + "|" + reorgRec.TradeCompanyShortName;
                    }
                }
            }

            if (xTradeList.Count > 1)
            {
                reorgRec.TradeCompany = String.Join(Utility.SplitChar, xTradeList);
            }

            //释义表中获得的交易对手,进行必要的订正
            if (string.IsNullOrEmpty(reorgRec.TradeCompany))
            {
                var xTradeListExplain = getTradeCompanyByExplain(reorgRec);
                foreach (var tradeItem in xTradeListExplain)
                {
                    //交易公司的简称填补
                    foreach (var dict in ExplainDict)
                    {
                        var keys  = dict.Key.Split(Utility.SplitChar);
                        var keys2 = dict.Key.Split(new char[] { '/', '/' });
                        var isHit = false;
                        if (keys.Length == 1 && keys2.Length > 1)
                        {
                            keys = keys2;
                        }
                        foreach (var key in keys)
                        {
                            if (key.Contains("标的"))
                            {
                                continue;
                            }
                            if (key.Contains("目标"))
                            {
                                continue;
                            }
                            if (key.Equals("上市公司"))
                            {
                                continue;
                            }
                            if (key.Equals("本公司"))
                            {
                                continue;
                            }
                            if (key.Equals(tradeItem.Value) || dict.Value.Equals(tradeItem.Value))
                            {
                                var tempKey = key;
                                if (tempKey.Contains(","))
                                {
                                    tempKey = Utility.GetStringBefore(tempKey, ",");
                                }

                                var tempvalue = dict.Value;
                                if (tempvalue.Contains(","))
                                {
                                    tempvalue = Utility.GetStringBefore(tempvalue, ",");
                                }
                                reorgRec.TradeCompanyFullName = tempvalue;
                                if (!tempKey.Equals("交易对方") && !tempKey.Equals("发行对象"))
                                {
                                    reorgRec.TradeCompanyShortName = tempKey;
                                }
                                isHit = true;
                                break;
                            }
                        }
                        if (isHit)
                        {
                            break;
                        }
                    }
                    reorgRec.TradeCompany = tradeItem.Value;
                    if (!String.IsNullOrEmpty(reorgRec.TradeCompanyFullName) &&
                        !String.IsNullOrEmpty(reorgRec.TradeCompanyShortName))
                    {
                        reorgRec.TradeCompany = reorgRec.TradeCompanyFullName + "|" + reorgRec.TradeCompanyShortName;
                    }
                    else
                    {
                        //中建六局及中建八局
                        var tradeArray = tradeItem.Value.Split(Utility.SplitChar).ToList();
                        var last       = tradeArray.Last();
                        if (last.Contains("以及"))
                        {
                            tradeArray.RemoveAt(tradeArray.Count - 1);
                            tradeArray.Add(Utility.GetStringBefore(last, "以及"));
                            tradeArray.Add(Utility.GetStringAfter(last, "以及"));
                            reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray);
                        }
                        else
                        {
                            if (last.Contains("及"))
                            {
                                tradeArray.RemoveAt(tradeArray.Count - 1);
                                tradeArray.Add(Utility.GetStringBefore(last, "及"));
                                tradeArray.Add(Utility.GetStringAfter(last, "及"));
                                reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray);
                            }
                            if (last.Contains("和"))
                            {
                                tradeArray.RemoveAt(tradeArray.Count - 1);
                                tradeArray.Add(Utility.GetStringBefore(last, "和"));
                                tradeArray.Add(Utility.GetStringAfter(last, "和"));
                                reorgRec.TradeCompany = String.Join(Utility.SplitChar, tradeArray);
                            }
                        }
                    }
                    Console.WriteLine("使用释义表的交易对手:" + tradeItem.Key + ":" + reorgRec.TradeCompany);
                    break;
                }
            }

            //交易对手最后整型
            NormalizeTradeCompany(reorgRec);

            var Price = GetPrice(reorgRec, targets.Count == 1);
            reorgRec.Price          = MoneyUtility.Format(Price.MoneyAmount, String.Empty);
            reorgRec.EvaluateMethod = getEvaluateMethod(reorgRec, targets.Count == 1);

            if (!String.IsNullOrEmpty(reorgRec.TargetCompanyFullName) &&
                !String.IsNullOrEmpty(reorgRec.TargetCompanyShortName))
            {
                reorgRec.TargetCompany = reorgRec.TargetCompanyFullName + "|" + reorgRec.TargetCompanyShortName;
            }

            if (String.IsNullOrEmpty(reorgRec.TargetCompany) || String.IsNullOrEmpty(reorgRec.Target))
            {
                continue;
            }
            //相同记录合并
            var  UnionKey   = reorgRec.TargetCompany + reorgRec.Target;
            bool IsKeyExist = false;
            foreach (ReorganizationRec exist in list)
            {
                var existKey = exist.TargetCompany + exist.Target;
                if (UnionKey.Equals(existKey))
                {
                    IsKeyExist = true;
                    break;
                }
            }
            if (!IsKeyExist)
            {
                list.Add(reorgRec);
            }
        }

        //价格或者评估表中出现过的(以下代码这里只是检证)
        if (PriceTable.Count != 0 && EvaluateTable.Count != 0 && PriceTable.Count == EvaluateTable.Count)
        {
            if (PriceTable.Count != list.Count)
            {
                Console.WriteLine(Id);
                foreach (var item in EvaluateTable)
                {
                    Console.WriteLine("评估表:" + item[0].RawData.Replace(" ", "") + " Value:" + item[1].RawData);
                }
                foreach (var item in PriceTable)
                {
                    Console.WriteLine("价格表:" + item[0].RawData.Replace(" ", "") + " Value:" + item[1].RawData);
                }

                foreach (ReorganizationRec item in list)
                {
                    Console.WriteLine("抽出:" + item.TargetCompany + item.Target);
                }
            }
        }

        return(list);
    }