Example #1
0
    /// <summary>
    /// 从释义表抽取数据
    /// </summary>
    /// <param name="Target"></param>
    /// <param name="Comany"></param>
    /// <returns></returns>
    private List <(string Target, string Company)> ExtractTargetFromExplainTable(List <struCompanyName> CompanyAtExplainTable, string[] ExplainKeys)
    {
        var AllCompanyName = new List <String>();

        foreach (var item in CompanyAtExplainTable)
        {
            if (!String.IsNullOrEmpty(item.secShortName))
            {
                AllCompanyName.Add(item.secShortName);
            }
            if (!String.IsNullOrEmpty(item.secFullName))
            {
                AllCompanyName.Add(item.secFullName);
            }
        }

        //股份的抽取
        var targetRegular = new ExtractProperyBase.struRegularExpressFeature()
        {
            LeadingWordList  = AllCompanyName,
            RegularExpress   = RegularTool.PercentExpress,
            TrailingWordList = new string[] { "的股权", "股权", "的权益", "权益", "的股份", "股份" }.ToList()
        };


        //其他标的
        var OtherTargets = new string[] { "资产及负债", "资产和负债",
                                          "主要资产和部分负债", "主要资产及部分负债",
                                          "经营性资产及负债", "经营性资产和负债", "应收账款和其他应收款",
                                          "负债", "债权", "全部权益", "经营性资产", "非股权类资产", "资产、负债、业务",
                                          "直属资产", "普通股股份", "土地使用权", "使用权", "房产" };

        var TargetAndCompanyList = new List <(string Target, string Comany)>();

        foreach (var Rplkey in ExplainKeys)
        {
            //可能性最大的排在最前
            foreach (var ExplainDictItem in ExplainDict)
            {
                var keys  = ExplainDictItem.Key.Split(Utility.SplitChar);
                var keys2 = ExplainDictItem.Key.Split(new char[] { '/', '/' });
                if (keys.Length == 1 && keys2.Length > 1)
                {
                    keys = keys2;
                }
                var values  = ExplainDictItem.Value.Split(Utility.SplitChar);
                var values2 = ExplainDictItem.Value.Split(";");
                if (values.Length == 1 && values2.Length > 1)
                {
                    values = values2;
                }

                //keys里面可能包括【拟】字需要去除
                var SearchKey = keys.Select((x) => { return(x.StartsWith("拟") ? x.Substring(1) : x); });
                SearchKey = SearchKey.Select(x => x.Trim()).ToArray();
                if (SearchKey.Contains(Rplkey))
                {
                    if (Rplkey.Equals("交易标的") || Rplkey.Equals("标的资产") || Rplkey.Equals("标的公司"))
                    {
                        foreach (var cn in companynamelist)
                        {
                            if (ExplainDictItem.Value.Equals(cn.secFullName) ||
                                ExplainDictItem.Value.Equals(cn.secShortName))
                            {
                                var extra = ("100%股权", ExplainDictItem.Value);
                                TargetAndCompanyList.Add(extra);
                                Console.WriteLine(Id + ":100%股权" + ExplainDictItem.Value);
                                return(TargetAndCompanyList);
                            }
                        }
                    }
                    foreach (var targetRecordItem in values)
                    {
                        var SingleItemList = Utility.CutByPOSConection(targetRecordItem);
                        foreach (var SingleItem in SingleItemList)
                        {
                            var targetAndcompany = SingleItem.Trim().Replace(" ", "");
                            targetAndcompany = targetAndcompany.Trim().Replace("合计", "");
                            if (targetAndcompany.Contains("持有的"))
                            {
                                targetAndcompany = Utility.GetStringAfter(targetAndcompany, "持有的");
                            }
                            if (targetAndcompany.Contains("持有"))
                            {
                                targetAndcompany = Utility.GetStringAfter(targetAndcompany, "持有");
                            }
                            if (targetAndcompany.Contains("所持"))
                            {
                                targetAndcompany = Utility.GetStringAfter(targetAndcompany, "所持");
                            }

                            //将公司名称和交易标的划分开来
                            var ExpResult = ExtractPropertyByHTML.RegularExFinder(0, targetAndcompany, targetRegular, "|");
                            if (ExpResult.Count == 0)
                            {
                                //其他类型的标的
                                if (!String.IsNullOrEmpty(GetOtherOwnerByExplainTable(targetAndcompany)))
                                {
                                    var extra = (targetAndcompany, GetOtherOwnerByExplainTable(targetAndcompany));
                                    if (!TargetAndCompanyList.Contains(extra))
                                    {
                                        TargetAndCompanyList.Add(extra);
                                    }
                                }
                                else
                                {
                                    foreach (var rc in CompanyAtExplainTable)
                                    {
                                        var IsFullNameHit = false;
                                        //资产里面可能是带有公司名字的情况
                                        if (!String.IsNullOrEmpty(rc.secFullName) && targetAndcompany.Contains(rc.secFullName))
                                        {
                                            foreach (var ot in OtherTargets)
                                            {
                                                if (targetAndcompany.Contains(ot))
                                                {
                                                    IsFullNameHit = true;
                                                    TargetAndCompanyList.Add((ot, rc.secFullName));
                                                    break;
                                                }
                                            }
                                        }
                                        if (!IsFullNameHit)
                                        {
                                            if (!String.IsNullOrEmpty(rc.secShortName) && targetAndcompany.Contains(rc.secShortName))
                                            {
                                                foreach (var ot in OtherTargets)
                                                {
                                                    if (targetAndcompany.Contains(ot))
                                                    {
                                                        IsFullNameHit = true;
                                                        TargetAndCompanyList.Add((ot, rc.secFullName));
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                        //XXXX持有的XXXX的形式,不过现在可能已经不用了
                                        if (TargetAndCompanyList.Count == 0 && !String.IsNullOrEmpty(rc.secFullName) && targetAndcompany.StartsWith(rc.secFullName))
                                        {
                                            var extra = (targetAndcompany.Substring(rc.secFullName.Length), rc.secFullName);
                                            if (!TargetAndCompanyList.Contains(extra))
                                            {
                                                TargetAndCompanyList.Add(extra);
                                            }
                                            break;
                                        }
                                        if (TargetAndCompanyList.Count == 0 && !String.IsNullOrEmpty(rc.secShortName) && targetAndcompany.StartsWith(rc.secShortName))
                                        {
                                            var extra = (targetAndcompany.Substring(rc.secShortName.Length), rc.secShortName);
                                            if (!TargetAndCompanyList.Contains(extra))
                                            {
                                                TargetAndCompanyList.Add(extra);
                                            }
                                            break;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                foreach (var r in ExpResult)
                                {
                                    var arr           = r.Value.Split("|");
                                    var target        = arr[1] + arr[2];
                                    var targetCompany = arr[0];
                                    if (targetCompany.Contains("持有的"))
                                    {
                                        targetCompany = Utility.GetStringAfter(targetCompany, "持有的");
                                    }
                                    if (targetCompany.Contains("持有"))
                                    {
                                        targetCompany = Utility.GetStringAfter(targetCompany, "持有");
                                    }
                                    if (targetCompany.Contains("所持"))
                                    {
                                        targetCompany = Utility.GetStringAfter(targetCompany, "所持");
                                    }
                                    var extra = (target.Replace(" ", ""), targetCompany.Replace(" ", ""));
                                    if (!TargetAndCompanyList.Contains(extra))
                                    {
                                        TargetAndCompanyList.Add(extra);
                                    }
                                }
                            }
                        }
                    }
                    if (TargetAndCompanyList.Count != 0)
                    {
                        return(TargetAndCompanyList);
                    }
                }
            }
        }
        return(TargetAndCompanyList);
    }
Example #2
0
    /// <summary>
    /// 从释义表抽取数据
    /// </summary>
    /// <param name="Target"></param>
    /// <param name="Comany"></param>
    /// <returns></returns>
    private List <(string Target, string Comany)> ExtractFromExplainTable(List <struCompanyName> CompanyAtExplainTable, string[] ExplainKeys)
    {
        var AllCompanyName = new List <String>();

        foreach (var item in CompanyAtExplainTable)
        {
            if (!String.IsNullOrEmpty(item.secShortName))
            {
                AllCompanyName.Add(item.secShortName);
            }
            if (!String.IsNullOrEmpty(item.secFullName))
            {
                AllCompanyName.Add(item.secFullName);
            }
        }

        //股份的抽取
        var targetRegular = new ExtractProperyBase.struRegularExpressFeature()
        {
            LeadingWordList  = AllCompanyName,
            RegularExpress   = RegularTool.PercentExpress,
            TrailingWordList = new string[] { "的股权", "股权", "的权益", "权益" }.ToList()
        };


        var OtherTargets = new string[] { "资产及负债", "直属资产" };

        var TargetAndCompanyList = new List <(string Target, string Comany)>();

        foreach (var Rplkey in ExplainKeys)
        {
            //可能性最大的排在最前
            foreach (var item in ExplainDict)
            {
                var keys  = item.Key.Split(Utility.SplitChar);
                var keys2 = item.Key.Split("/");
                if (keys.Length == 1 && keys2.Length > 1)
                {
                    keys = keys2;
                }
                var values  = item.Value.Split(Utility.SplitChar);
                var values2 = item.Value.Split(";");
                if (values.Length == 1 && values2.Length > 1)
                {
                    values = values2;
                }

                //keys里面可能包括【拟】字需要去除
                var SearchKey = keys.Select((x) => { return(x.StartsWith("拟") ? x.Substring(1) : x); });
                SearchKey = SearchKey.Select(x => x.Trim()).ToArray();
                if (SearchKey.Contains(Rplkey))
                {
                    foreach (var targetRecordItem in values)
                    {
                        //DEBUG:
                        var SingleItemList = Utility.CutByPOSConection(targetRecordItem);
                        if (SingleItemList.Count == 2)
                        {
                            //1.家和股份  和的问题
                            //2.空格问题
                            //3.置入和置出问题
                            //4.其他奇怪的问题
                            //5.资产和负债
                            //6.所拥有的,所持有的
                            //Console.WriteLine(Id + " 分割:");
                            //Console.WriteLine(Id + " 原词:" + targetRecordItem);
                            //Console.WriteLine(Id + " 分量1:" + SingleItemList[0]);
                            //Console.WriteLine(Id + " 分量2:" + SingleItemList[1]);
                        }
                        foreach (var SingleItem in SingleItemList)
                        {
                            var targetAndcompany = SingleItem.Trim().Replace(" ", "");
                            //将公司名称和交易标的划分开来
                            var ExpResult = ExtractPropertyByHTML.RegularExFinder(0, targetAndcompany, targetRegular, "|");
                            if (ExpResult.Count == 0)
                            {
                                //其他类型的标的
                                foreach (var rc in CompanyAtExplainTable)
                                {
                                    var IsFullNameHit = false;
                                    if (!String.IsNullOrEmpty(rc.secFullName) && targetAndcompany.Contains(rc.secFullName))
                                    {
                                        foreach (var ot in OtherTargets)
                                        {
                                            if (targetAndcompany.Contains(ot))
                                            {
                                                IsFullNameHit = true;
                                                TargetAndCompanyList.Add((rc.secFullName, ot));
                                                break;
                                            }
                                        }
                                    }

                                    if (!IsFullNameHit)
                                    {
                                        if (!String.IsNullOrEmpty(rc.secShortName) && targetAndcompany.Contains(rc.secShortName))
                                        {
                                            foreach (var ot in OtherTargets)
                                            {
                                                if (targetAndcompany.Contains(ot))
                                                {
                                                    IsFullNameHit = true;
                                                    TargetAndCompanyList.Add((rc.secShortName, ot));
                                                    break;
                                                }
                                            }
                                        }
                                    }

                                    if (TargetAndCompanyList.Count == 0 && !String.IsNullOrEmpty(rc.secFullName) && targetAndcompany.StartsWith(rc.secFullName))
                                    {
                                        var extra = (SingleItem.Substring(rc.secFullName.Length), rc.secFullName);
                                        if (!TargetAndCompanyList.Contains(extra))
                                        {
                                            TargetAndCompanyList.Add(extra);
                                        }
                                        break;
                                    }
                                    if (TargetAndCompanyList.Count == 0 && !String.IsNullOrEmpty(rc.secShortName) && targetAndcompany.StartsWith(rc.secShortName))
                                    {
                                        var extra = (SingleItem.Substring(rc.secShortName.Length), rc.secShortName);
                                        if (!TargetAndCompanyList.Contains(extra))
                                        {
                                            TargetAndCompanyList.Add(extra);
                                        }
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                foreach (var r in ExpResult)
                                {
                                    var arr   = r.Value.Split("|");
                                    var extra = (arr[1] + arr[2], arr[0]);
                                    if (!TargetAndCompanyList.Contains(extra))
                                    {
                                        TargetAndCompanyList.Add(extra);
                                    }
                                }
                            }
                        }
                    }
                    if (TargetAndCompanyList.Count != 0)
                    {
                        return(TargetAndCompanyList);
                    }
                }
            }
        }
        return(TargetAndCompanyList);
    }