Пример #1
0
    internal static struStockChange ConvertFromString(string str)
    {
        var Array = str.Split("\t");
        var c     = new struStockChange();

        c.id              = Array[0];
        c.HolderFullName  = Array[1];
        c.HolderShortName = Array[2];
        if (Array.Length > 3)
        {
            c.ChangeEndDate = Array[3];
        }
        if (Array.Length > 4)
        {
            c.ChangePrice = Array[4];
        }
        if (Array.Length > 5)
        {
            c.ChangeNumber = Array[5];
        }
        if (Array.Length > 6)
        {
            c.HoldNumberAfterChange = Array[6];
        }
        if (Array.Length == 8)
        {
            c.HoldPercentAfterChange = Array[7];
        }
        return(c);
    }
Пример #2
0
    public static List <struStockChange> Extract(string htmlFileName)
    {
        var list = new List <struStockChange>();
        var fi   = new System.IO.FileInfo(htmlFileName);

        Program.Logger.WriteLine("Start FileName:[" + fi.Name + "]");
        var node = HTMLEngine.Anlayze(htmlFileName);

        companynamelist = BussinessLogic.GetCompanyNameByCutWord(node);

        list = ExtractFromTable(node, fi.Name.Replace(".html", ""));
        if (list.Count > 0)
        {
            return(list);
        }

        var stockchange = new struStockChange();

        //公告ID
        stockchange.id = fi.Name.Replace(".html", "");
        Program.Logger.WriteLine("公告ID:" + stockchange.id);
        var Name = NormalizeCompanyName(GetHolderFullName(node));

        stockchange.HolderFullName  = Name.Item1;
        stockchange.HolderShortName = Name.Item2;
        stockchange.ChangeEndDate   = GetChangeEndDate(node);
        list.Add(stockchange);
        return(list);
    }
Пример #3
0
    public List <struStockChange> Extract()
    {
        var DateRange = LocateDateRange(root);
        var list      = new List <struStockChange>();
        var Name      = GetHolderName();

        if (!String.IsNullOrEmpty(Name.FullName) && !String.IsNullOrEmpty(Name.ShortName))
        {
            companynamelist.Add(new struCompanyName()
            {
                secFullName  = Name.FullName,
                secShortName = Name.ShortName
            });
        }
        list = ExtractFromTable();
        //list = ExtractFromTableByContent();
        if (list.Count > 0)
        {
            return(list);                   //如果这里直接返回,由于召回率等因素,可以细微提高成绩
        }
        var stockchange = new struStockChange();

        //公告ID
        stockchange.id = Id;
        //if (!Program.IsMultiThreadMode) Program.Logger.WriteLine("公告ID:" + stockchange.id);
        stockchange.HolderFullName = Name.FullName.NormalizeTextResult();
        if (EntityWordAnlayzeTool.TrimEnglish(stockchange.HolderFullName).Length > ContractTraning.MaxYiFangLength)
        {
            stockchange.HolderFullName = String.Empty;
        }
        stockchange.HolderShortName = Name.ShortName;
        stockchange.ChangeEndDate   = GetChangeEndDate(root);

        DateTime x;

        if (!DateTime.TryParse(stockchange.ChangeEndDate, out x))
        {
            //无法处理的情况
            if (!Program.IsDebugMode)
            {
                //非调试模式
                stockchange.ChangeEndDate = String.Empty;
            }
        }

        if (!string.IsNullOrEmpty(stockchange.HolderFullName) && !string.IsNullOrEmpty(stockchange.ChangeEndDate))
        {
            if (!stockchange.HolderFullName.Contains("增持") && !stockchange.HolderFullName.Contains("减持"))
            {
                list.Add(stockchange);
            }
        }

        return(list);
    }
Пример #4
0
    internal static string ConvertToString(struStockChange increaseStock)
    {
        var record = increaseStock.id + "," +
                     increaseStock.HolderFullName + "," +
                     increaseStock.HolderShortName + "," +
                     increaseStock.ChangeEndDate + ",";

        record += Normalizer.NormalizeNumberResult(increaseStock.ChangePrice) + ",";
        record += Normalizer.NormalizeNumberResult(increaseStock.ChangeNumber) + ",";
        record += Normalizer.NormalizeNumberResult(increaseStock.HoldNumberAfterChange) + ",";
        record += Normalizer.NormalizeNumberResult(increaseStock.HoldPercentAfterChange) + ",";
        return(record);
    }
Пример #5
0
    static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name = "股东全称";
        StockHolderRule.Rule = new string[] { "股东名称" }.ToList();
        StockHolderRule.IsEq = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name      = "变动截止日期";
        ChangeDateRule.Rule      = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList();
        ChangeDateRule.IsEq      = false;
        ChangeDateRule.Normalize = Normalizer.NormailizeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Rule      = new string[] { "减持均价", "增持均价" }.ToList();
        ChangePriceRule.IsEq      = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Rule      = new string[] { "减持股数", "增持股数" }.ToList();
        ChangeNumberRule.IsEq      = false;
        ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber;

        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录
        result.Reverse();
        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;
            var Name = NormalizeCompanyName(rec[0].RawData);
            stockchange.HolderFullName  = Name.Item1;
            stockchange.HolderShortName = Name.Item2;
            stockchange.ChangeEndDate   = rec[1].RawData;
            stockchange.ChangePrice     = rec[2].RawData;
            stockchange.ChangeNumber    = rec[3].RawData;
            var holderafterlist = GetHolderAfter(root);
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                if (after.Used)
                {
                    continue;
                }
                if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName)
                {
                    stockchange.HoldNumberAfterChange  = after.Count;
                    stockchange.HoldPercentAfterChange = after.Percent;
                    after.Used = true;
                    break;
                }
            }
            //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的
            if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate))
            {
                continue;
            }
            stockchangelist.Add(stockchange);
        }

        //合并记录
        for (int i = 0; i < stockchangelist.Count; i++)
        {
            var x = stockchangelist[i];
            for (int j = i + 1; j < stockchangelist.Count; j++)
            {
                var y = stockchangelist[j];
                if (x.GetKey() == y.GetKey())
                {
                    if (string.IsNullOrEmpty(x.HoldNumberAfterChange) &&
                        !string.IsNullOrEmpty(y.HoldNumberAfterChange))
                    {
                        x.id = "";
                    }
                    if (!string.IsNullOrEmpty(x.HoldNumberAfterChange) &&
                        string.IsNullOrEmpty(y.HoldNumberAfterChange))
                    {
                        y.id = "";
                    }
                }
            }
        }
        return(stockchangelist.Where((x) => { return !String.IsNullOrEmpty(x.id); }).ToList());
    }
Пример #6
0
    List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name      = "股东全称";
        StockHolderRule.Title     = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人" }.ToList();
        StockHolderRule.IsTitleEq = true;
        StockHolderRule.IsRequire = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name  = "变动截止日期";
        ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间",
                                              "减持时间", "增持时间", "减持股份时间", "增持股份时间" }.ToList();
        ChangeDateRule.IsTitleEq = false;
        ChangeDateRule.Normalize = NormailizeEndChangeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Title     = new string[] { "成交均价", "减持价格", "增持价格", "减持均", "增持均" }.ToList();
        ChangePriceRule.IsTitleEq = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Title     = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量" }.ToList();
        ChangeNumberRule.IsTitleEq = false;
        ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber;


        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        if (result.Count == 0)
        {
            //没有抽取到任何数据
            Rules.Clear();
            ChangeDateRule.IsRequire = true;
            Rules.Add(ChangeDateRule);
            Rules.Add(ChangePriceRule);
            Rules.Add(ChangeNumberRule);
            result = HTMLTable.GetMultiInfo(root, Rules, false);
            if (result.Count == 0)
            {
                return(new List <struStockChange>());
            }
            var NewResult = new List <CellInfo[]>();
            var Name      = GetHolderName(this.root);
            if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName))
            {
                return(new List <struStockChange>());
            }
            foreach (var item in result)
            {
                NewResult.Add(new CellInfo[]
                              { new CellInfo()
                                {
                                    RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName
                                }, item[0], item[1], item[2] });
            }
            result = NewResult;
        }

        var holderafterlist = GetHolderAfter();

        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;

            var ModifyName = rec[0].RawData;
            //表格里面长的名字可能被分页切割掉
            //这里使用合计表进行验证
            if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName))
            {
                foreach (var item in holderafterlist)
                {
                    if (item.Name.EndsWith("先生"))
                    {
                        break;                            //特殊处理,没有逻辑可言
                    }
                    if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName))
                    {
                        ModifyName = item.Name;
                        break;
                    }
                    if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName))
                    {
                        ModifyName = item.Name;
                        break;
                    }
                }
            }


            var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName);
            stockchange.HolderFullName  = Name.FullName.NormalizeTextResult();
            stockchange.HolderShortName = Name.ShortName;

            if (stockchange.HolderFullName.Contains("简称"))
            {
                stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称");
                stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty);
                stockchange.HolderFullName  = Utility.GetStringBefore(stockchange.HolderFullName, "(");
            }

            stockchange.ChangeEndDate = rec[1].RawData;

            DateTime x;
            if (!DateTime.TryParse(stockchange.ChangeEndDate, out x))
            {
                //无法处理的情况
                if (!Program.IsDebugMode)
                {
                    //非调试模式
                    stockchange.ChangeEndDate = String.Empty;
                }
            }

            if (!String.IsNullOrEmpty(rec[2].RawData))
            {
                //股价区间化的去除
                if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至")))
                {
                    stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty);
                    stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult();
                }
            }
            if (!RegularTool.IsUnsign(stockchange.ChangePrice))
            {
                if (!String.IsNullOrEmpty(stockchange.ChangePrice))
                {
                    Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]");
                }
                stockchange.ChangePrice = String.Empty;
            }


            if (!String.IsNullOrEmpty(rec[3].RawData))
            {
                stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty);
                stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult();
                if (!RegularTool.IsUnsign(stockchange.ChangeNumber))
                {
                    if (!String.IsNullOrEmpty(stockchange.ChangeNumber))
                    {
                        Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]");
                    }
                    stockchange.ChangeNumber = String.Empty;
                }
            }

            //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的
            if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate))
            {
                continue;
            }
            if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0")
            {
                continue;
            }
            stockchangelist.Add(stockchange);
        }


        //寻找所有的股东全称
        var namelist = stockchangelist.Select(x => x.HolderFullName).Distinct().ToList();
        var newRec   = new List <struStockChange>();

        foreach (var name in namelist)
        {
            var stocklist = stockchangelist.Where((x) => { return(x.HolderFullName == name); }).ToList();
            stocklist.Sort((x, y) => { return(x.ChangeEndDate.CompareTo(y.ChangeEndDate)); });
            var last = stocklist.Last();
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                after.Name = after.Name.Replace(" ", "");
                if (after.Name == last.HolderFullName || after.Name == last.HolderShortName)
                {
                    stockchangelist.Remove(last);   //结构体,无法直接修改!!使用删除,增加的方法
                    last.HoldNumberAfterChange  = after.Count;
                    last.HoldPercentAfterChange = after.Percent;
                    newRec.Add(last);
                }
            }
        }

        if (holderafterlist.Count != namelist.Count)
        {
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("增持者数量确认!");
            }
        }

        stockchangelist.AddRange(newRec);
        return(stockchangelist);
    }
Пример #7
0
    static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name = "股东全称";
        StockHolderRule.Rule = new string[] { "股东名称" }.ToList();
        StockHolderRule.IsEq = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name      = "变动截止日期";
        ChangeDateRule.Rule      = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList();
        ChangeDateRule.IsEq      = false;
        ChangeDateRule.Normalize = Normalizer.NormailizeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Rule      = new string[] { "减持均价", "增持均价" }.ToList();
        ChangePriceRule.IsEq      = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Rule      = new string[] { "减持股数", "增持股数" }.ToList();
        ChangeNumberRule.IsEq      = false;
        ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber;

        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录
        result.Reverse();
        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;
            var Name = NormalizeCompanyName(rec[0].RawData);
            stockchange.HolderFullName  = Name.Item1;
            stockchange.HolderShortName = Name.Item2;
            stockchange.ChangeEndDate   = rec[1].RawData;
            stockchange.ChangePrice     = rec[2].RawData;
            stockchange.ChangeNumber    = rec[3].RawData;
            var holderafterlist = GetHolderAfter(root);
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                if (after.Used)
                {
                    continue;
                }
                if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName)
                {
                    stockchange.HoldNumberAfterChange  = after.Count;
                    stockchange.HoldPercentAfterChange = after.Percent;
                    after.Used = true;
                    break;
                }
            }
            stockchangelist.Add(stockchange);
        }
        return(stockchangelist);
    }