コード例 #1
0
ファイル: Contract.cs プロジェクト: ReadyThalia/FDDC
    static string GetYiFang(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor = new ExtractProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "供应商名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("甲方候补词(关键字):[" + item + "]");
            return(item);
        }
        //乙方:"有限公司"

        Extractor = new ExtractProperty();
        //这些关键字后面
        Extractor.TrailingWordList = new string[] { "有限公司董事会" };
        Extractor.Extract(root);
        Extractor.CandidateWord.Reverse();
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "有限公司]");
            return(item + "有限公司");
        }

        if (companynamelist.Count > 0)
        {
            return(companynamelist[companynamelist.Count - 1].secFullName);
        }

        return("");
    }
コード例 #2
0
ファイル: Contract.cs プロジェクト: kimmow/FDDC
    static string GetMoney(HTMLEngine.MyRootHtmlNode root)
    {
        var Money     = "";
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "中标金额", "中标价", "合同金额", "合同总价", "订单总金额" };
        Extractor.Extract(root);
        var AllMoneyList = new List <Tuple <String, String> >();

        foreach (var item in Extractor.CandidateWord)
        {
            var ml = Utility.SeekMoney(item);
            AllMoneyList.AddRange(ml);
        }
        if (AllMoneyList.Count == 0)
        {
            return("");
        }
        foreach (var m in AllMoneyList)
        {
            if (m.Item2 == "人民币" || m.Item2 == "元")
            {
                Money = m.Item1;
                break;
            }
        }
        if (Money == "")
        {
            Money = AllMoneyList[0].Item1;
        }
        Program.Logger.WriteLine("金额候补词:[" + Money + "]");

        return(Money);
    }
コード例 #3
0
    static List <struIncreaseStock> GetMultiTarget(HTMLEngine.MyRootHtmlNode root,
                                                   struIncreaseStock SampleincreaseStock)
    {
        var BuyerRule = new TableSearchRule();

        BuyerRule.Name = "认购对象";
        //"投资者名称","股东名称"
        BuyerRule.Rule = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList();
        BuyerRule.IsEq = true;

        var BuyNumber = new TableSearchRule();

        BuyNumber.Name      = "增发数量";
        BuyNumber.Rule      = new string[] { "配售股数", "认购数量", "认购股份数" }.ToList();
        BuyNumber.IsEq      = false;        //包含即可
        BuyNumber.Normalize = Normalizer.NormalizerStockNumber;

        var BuyMoney = new TableSearchRule();

        BuyMoney.Name      = "增发金额";
        BuyMoney.Rule      = new string[] { "配售金额", "认购金额" }.ToList();
        BuyMoney.IsEq      = false;        //包含即可
        BuyMoney.Normalize = Normalizer.NormalizerMoney;

        var FreezeYear = new TableSearchRule();

        FreezeYear.Name      = "锁定期";
        FreezeYear.Rule      = new string[] { "锁定期", "限售期" }.ToList();
        FreezeYear.IsEq      = false;        //包含即可
        FreezeYear.Normalize = NormalizerFreezeYear;

        var Rules = new List <TableSearchRule>();

        Rules.Add(BuyerRule);
        Rules.Add(BuyNumber);
        Rules.Add(BuyMoney);
        Rules.Add(FreezeYear);
        var result            = HTMLTable.GetMultiInfo(root, Rules, true);
        var increaseStocklist = new List <struIncreaseStock>();

        foreach (var item in result)
        {
            var increase = new struIncreaseStock();
            increase.id            = SampleincreaseStock.id;
            increase.BuyMethod     = SampleincreaseStock.BuyMethod;
            increase.PublishTarget = item[0].RawData;
            if (String.IsNullOrEmpty(increase.PublishTarget))
            {
                continue;
            }
            increase.IncreaseNumber = item[1].RawData;
            increase.IncreaseMoney  = item[2].RawData;
            increase.FreezeYear     = item[3].RawData;
            increaseStocklist.Add(increase);
        }
        return(increaseStocklist);
    }
コード例 #4
0
    static string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        //是否包含关键字 "现金认购"
        var cnt = EntityProperty.FindWordCnt("现金认购", root).Count;

        Program.Logger.WriteLine("现金认购(文本):" + cnt);
        if (cnt > 0)
        {
            return("现金");
        }
        return("");
    }
コード例 #5
0
    static string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        //是否包含关键字 "现金认购"
        var Extractor = new ExtractProperty();
        var cnt       = Extractor.FindWordCnt("现金认购", root);

        Program.Logger.WriteLine("现金认购(文本):" + cnt);
        if (cnt > 0)
        {
            return("现金");
        }
        return("");
    }
コード例 #6
0
ファイル: IncreaseStock.cs プロジェクト: toby2o12/FDDC
    /// <summary>
    /// 认购方式
    /// </summary>
    /// <param name="root"></param>
    /// <returns></returns>
    string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        var p = new EntityProperty();

        //是否包含关键字 "现金认购"
        p.KeyWordMap.Add("现金认购", "现金");
        p.Extract(this);
        if (!Program.IsMultiThreadMode)
        {
            Program.Logger.WriteLine("认购方式:" + string.Join(Utility.SplitChar, p.WordMapResult));
        }
        return(string.Join(Utility.SplitChar, p.WordMapResult));
    }
コード例 #7
0
ファイル: HTMLTableSearch.cs プロジェクト: lxxwin/FDDC
    /// <summary>
    /// 获得符合规则的行数据
    /// </summary>
    /// <param name="root"></param>
    /// <param name="rule"></param>
    /// <returns></returns>
    public static List <List <CellInfo> > GetMultiRowsByContentRule(HTMLEngine.MyRootHtmlNode root, TableSearchContentRule rule)
    {
        var Container = new List <List <CellInfo> >();

        for (int tableNo = 1; tableNo <= root.TableList.Count; tableNo++)
        {
            var table     = new HTMLTable(root.TableList[tableNo]);
            var RowHeader = table.GetRow(1);
            for (int RowNo = 1; RowNo < table.RowCount; RowNo++)
            {
                var row     = table.GetRow(RowNo);
                var IsMatch = false;
                foreach (var cell in row)
                {
                    if (rule.Content != null)
                    {
                        foreach (var content in rule.Content)
                        {
                            if (rule.IsContentEq)
                            {
                                //相等模式
                                if (content.Equals(cell.Replace(" ", "")))
                                {
                                    Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader));
                                    IsMatch = true;
                                    break;
                                }
                            }
                            else
                            {
                                //包含模式
                                if (content.Contains(cell.Replace(" ", "")))
                                {
                                    Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader));
                                    IsMatch = true;
                                    break;
                                }
                            }
                        }
                    }
                    if (IsMatch)
                    {
                        break;
                    }
                }
            }
        }
        return(Container);
    }
コード例 #8
0
ファイル: Contract.cs プロジェクト: ReadyThalia/FDDC
    static string GetMoney(HTMLEngine.MyRootHtmlNode node)
    {
        var Money     = "";
        var Extractor = new ExtractProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "中标金额", "中标价", "合同金额", "合同总价", "订单总金额" };
        Extractor.Extract(node);
        foreach (var item in Extractor.CandidateWord)
        {
            Money = Utility.SeekMoney(item, "");
            Program.Logger.WriteLine("金额候补词:[" + Money + "]");
        }
        return(Money);
    }
コード例 #9
0
ファイル: StockChange.cs プロジェクト: kimmow/FDDC
    //变动截止日期
    static string GetChangeEndDate(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor  = new EntityProperty();
        var StartArray = new string[] { "截止", "截至" };
        var EndArray   = new string[] { "日" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("候补变动截止日期:[" + item + "]");
            return(Normalizer.NormailizeDate(item + "日"));
        }
        return("");
    }
コード例 #10
0
ファイル: IncreaseStock.cs プロジェクト: PeiYangLiu/FDDC
    //认购方式
    string getBuyMethod(HTMLEngine.MyRootHtmlNode root)
    {
        var p = new EntityProperty();

        //是否包含关键字 "现金认购"
        p.KeyWordMap.Add("现金认购", "现金");
        p.Extract(this);
        if (!String.IsNullOrEmpty(p.WordMapResult))
        {
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("认购方式:" + p.WordMapResult);
            }
        }
        return(p.WordMapResult);
    }
コード例 #11
0
ファイル: Contract.cs プロジェクト: kimmow/FDDC
    static string GetYiFang(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "供应商名称:", "乙方:" };
        //"中标单位:","中标人:","中标单位:","中标人:","乙方(供方):","承包人:","承包方:","中标方:","供应商名称:","中标人名称:"
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "]");
            return(item.Trim());
        }

        //乙方:"有限公司"
        Extractor = new EntityProperty();
        //这些关键字后面
        Extractor.TrailingWordList = new string[] { "有限公司董事会" };
        Extractor.Extract(root);
        Extractor.CandidateWord.Reverse();
        foreach (var item in Extractor.CandidateWord)
        {
            //如果有子公司的话,优先使用子公司
            foreach (var c in companynamelist)
            {
                if (c.isSubCompany)
                {
                    return(c.secFullName);
                }
            }
            Program.Logger.WriteLine("乙方候补词(关键字):[" + item + "有限公司]");
            return(item.Trim() + "有限公司");
        }

        if (companynamelist.Count > 0)
        {
            return(companynamelist[companynamelist.Count - 1].secFullName);
        }
        return("");
    }
コード例 #12
0
ファイル: StockChange.cs プロジェクト: kimmow/FDDC
    static string GetHolderFullName(HTMLEngine.MyRootHtmlNode root)
    {
        var Extractor  = new EntityProperty();
        var StartArray = new string[] { "接到", "收到", "股东" };
        var EndArray   = new string[] { "的", "通知", "告知函", "减持", "增持", "《" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var word in Extractor.CandidateWord)
        {
            if (word.Contains("简称"))
            {
                Program.Logger.WriteLine("候补股东全称修正:[" + word + "]");
                return(word);
            }
        }
        if (Extractor.CandidateWord.Count > 0)
        {
            return(Extractor.CandidateWord[0]);
        }
        return("");
    }
コード例 #13
0
ファイル: Contract.cs プロジェクト: kimmow/FDDC
    static string GetUnionMember(HTMLEngine.MyRootHtmlNode root, String YiFang)
    {
        var paragrahlist = EntityProperty.FindWordCnt("联合体", root);
        var Union        = new List <String>();

        foreach (var paragrahId in paragrahlist)
        {
            foreach (var comp in companynamelist)
            {
                if (comp.positionId == paragrahId)
                {
                    if (!Union.Contains(comp.secFullName))
                    {
                        if (!comp.secFullName.Equals(YiFang))
                        {
                            Union.Add(comp.secFullName);
                        }
                    }
                }
            }
        }
        return(String.Join("、", Union));
    }
コード例 #14
0
ファイル: HTMLTable.cs プロジェクト: ReadyThalia/FDDC
    public static List<CellInfo[]> GetMultiInfo(HTMLEngine.MyRootHtmlNode root, List<TableSearchRule> Rules, bool IsMeger)
    {
        var Container = new List<CellInfo[]>();
        for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++)
        {
            var table = new HTMLTable(root.TableList[tableIndex + 1]);
            var HeaderRow = table.GetHeaderRow();

            var checkResult = new int[Rules.Count];
            for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
            {
                //在每个行首单元格检索
                for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++)
                {
                    if (Rules[checkItemIdx].IsEq)
                    {
                        //相等模式:规则里面没有该词语
                        if (!Rules[checkItemIdx].Rule.Contains(HeaderRow[ColIndex])) continue;
                    }
                    else
                    {
                        bool IsMatch = false;
                        //包含模式
                        foreach (var r in Rules[checkItemIdx].Rule)
                        {
                            if (HeaderRow[ColIndex].Contains(r))
                            {
                                IsMatch = true;
                                break;
                            }
                        }
                        if (!IsMatch) continue;
                    }
                    //找到列位置
                    checkResult[checkItemIdx] = ColIndex + 1;
                    break;
                }
                //主字段没有找到,其他不用找了
                if (checkResult[0] == 0) break;
            }

            //主字段没有找到,下一张表
            if (checkResult[0] == 0) continue;

            for (int RowNo = 2; RowNo <= table.RowCount; RowNo++)
            {
                if (table.IsTotalRow(RowNo)) continue;          //非合计行
                var target = table.CellValue(RowNo, checkResult[0]);    //主字段非空
                if (target == "" || target == "<rowspan>" || target == "<colspan>" || target == "<null>") continue;
                if (Rules[0].Rule.Contains(target)) continue;

                var RowData = new CellInfo[Rules.Count];
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    if (checkResult[checkItemIdx] == 0) continue;
                    var ColNo = checkResult[checkItemIdx];
                    RowData[checkItemIdx].TableId = tableIndex + 1;
                    RowData[checkItemIdx].Row = RowNo;
                    RowData[checkItemIdx].Column = ColNo;

                    if (table.CellValue(RowNo, ColNo).Equals("<null>")) continue;
                    RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo);
                    if (Rules[checkItemIdx].Normalize != null)
                    {
                        RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]);
                    }

                }

                var HasSame = false;
                foreach (var existRow in Container)
                {
                    if (IsSameContent(existRow, RowData))
                    {
                        HasSame = true;
                        break;
                    }
                }
                if (!HasSame) Container.Add(RowData);
            }
        }
        if (IsMeger) Container = MergerMultiInfo(Container);
        return Container;
    }
コード例 #15
0
ファイル: StockChange.cs プロジェクト: kimmow/FDDC
    static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name = "股东全称";
        StockHolderRule.Rule = new string[] { "股东名称" }.ToList();
        StockHolderRule.IsEq = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name      = "变动截止日期";
        ChangeDateRule.Rule      = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList();
        ChangeDateRule.IsEq      = false;
        ChangeDateRule.Normalize = Normalizer.NormailizeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Rule      = new string[] { "减持均价", "增持均价" }.ToList();
        ChangePriceRule.IsEq      = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Rule      = new string[] { "减持股数", "增持股数" }.ToList();
        ChangeNumberRule.IsEq      = false;
        ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber;

        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录
        result.Reverse();
        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;
            var Name = NormalizeCompanyName(rec[0].RawData);
            stockchange.HolderFullName  = Name.Item1;
            stockchange.HolderShortName = Name.Item2;
            stockchange.ChangeEndDate   = rec[1].RawData;
            stockchange.ChangePrice     = rec[2].RawData;
            stockchange.ChangeNumber    = rec[3].RawData;
            var holderafterlist = GetHolderAfter(root);
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                if (after.Used)
                {
                    continue;
                }
                if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName)
                {
                    stockchange.HoldNumberAfterChange  = after.Count;
                    stockchange.HoldPercentAfterChange = after.Percent;
                    after.Used = true;
                    break;
                }
            }
            //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的
            if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate))
            {
                continue;
            }
            stockchangelist.Add(stockchange);
        }

        //合并记录
        for (int i = 0; i < stockchangelist.Count; i++)
        {
            var x = stockchangelist[i];
            for (int j = i + 1; j < stockchangelist.Count; j++)
            {
                var y = stockchangelist[j];
                if (x.GetKey() == y.GetKey())
                {
                    if (string.IsNullOrEmpty(x.HoldNumberAfterChange) &&
                        !string.IsNullOrEmpty(y.HoldNumberAfterChange))
                    {
                        x.id = "";
                    }
                    if (!string.IsNullOrEmpty(x.HoldNumberAfterChange) &&
                        string.IsNullOrEmpty(y.HoldNumberAfterChange))
                    {
                        y.id = "";
                    }
                }
            }
        }
        return(stockchangelist.Where((x) => { return !String.IsNullOrEmpty(x.id); }).ToList());
    }
コード例 #16
0
ファイル: IncreaseStock.cs プロジェクト: toby2o12/FDDC
    List <RecordBase> GetMultiTarget(HTMLEngine.MyRootHtmlNode root, IncreaseStockRec SampleincreaseStock)
    {
        var PublishTarget = new TableSearchTitleRule();

        PublishTarget.Name = "认购对象";
        //"投资者名称","股东名称"
        PublishTarget.Title     = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList();
        PublishTarget.IsTitleEq = true;
        PublishTarget.IsRequire = true;

        var IncreaseNumber = new TableSearchTitleRule();

        IncreaseNumber.Name      = "增发数量";
        IncreaseNumber.Title     = new string[] { "配售股数", "认购数量", "认购股数", "认购股份数", "发行股份数", "配售数量" }.ToList();
        IncreaseNumber.IsTitleEq = false;             //包含即可
        IncreaseNumber.Normalize = NumberUtility.NormalizerStockNumber;

        var IncreaseMoney = new TableSearchTitleRule();

        IncreaseMoney.Name      = "增发金额";
        IncreaseMoney.Title     = new string[] { "配售金额", "认购金额", "获配金额" }.ToList();
        IncreaseMoney.IsTitleEq = false;             //包含即可
        IncreaseMoney.Normalize = MoneyUtility.Format;

        var FreezeYear = new TableSearchTitleRule();

        FreezeYear.Name      = "锁定期";
        FreezeYear.Title     = new string[] { "锁定期", "限售期" }.ToList();
        FreezeYear.IsTitleEq = false;             //包含即可
        FreezeYear.Normalize = NormalizerFreezeYear;

        var BuyPrice = new TableSearchTitleRule();

        BuyPrice.Name      = "价格";
        BuyPrice.Title     = new string[] { "认购价格", "配售价格", "申购报价" }.ToList();
        BuyPrice.IsTitleEq = false;             //包含即可
        BuyPrice.Normalize = MoneyUtility.Format;

        var Rules = new List <TableSearchTitleRule>();

        Rules.Add(PublishTarget);
        Rules.Add(IncreaseNumber);
        Rules.Add(IncreaseMoney);
        Rules.Add(FreezeYear);
        Rules.Add(BuyPrice);
        var result            = HTMLTable.GetMultiInfoByTitleRules(root, Rules, true);
        var increaseStocklist = new List <RecordBase>();

        foreach (var item in result)
        {
            var increase = new IncreaseStockRec();
            increase.Id            = SampleincreaseStock.Id;
            increase.BuyMethod     = SampleincreaseStock.BuyMethod;
            increase.PublishTarget = item[0].RawData;
            if (String.IsNullOrEmpty(increase.PublishTarget))
            {
                continue;
            }
            increase.PublishTarget = increase.PublishTarget.NormalizeTextResult();

            increase.IncreaseNumber = item[1].RawData;
            if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Equals("0"))
            {
                continue;
            }
            if (!String.IsNullOrEmpty(increase.IncreaseNumber) && increase.IncreaseNumber.Contains("|"))
            {
                increase.IncreaseNumber = increase.IncreaseNumber.Split("|").Last();
            }
            increase.IncreaseMoney = item[2].RawData;
            if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Equals("0"))
            {
                continue;
            }
            if (!String.IsNullOrEmpty(increase.IncreaseMoney) && increase.IncreaseMoney.Contains("|"))
            {
                increase.IncreaseMoney = increase.IncreaseMoney.Split("|").Last();
            }

            //手工计算金额
            if (String.IsNullOrEmpty(increase.IncreaseMoney))
            {
                if (!String.IsNullOrEmpty(increase.IncreaseNumber))
                {
                    if (!String.IsNullOrEmpty(item[4].RawData))
                    {
                        double price;
                        if (double.TryParse(item[4].RawData, out price))
                        {
                            double number;
                            if (double.TryParse(increase.IncreaseNumber, out number))
                            {
                                double money = price * number;
                                if (!Program.IsMultiThreadMode)
                                {
                                    Program.Logger.WriteLine("通过计算获得金额:" + money.ToString());
                                }
                            }
                        }
                    }
                }
            }

            increase.FreezeYear = item[3].RawData;
            increaseStocklist.Add(increase);
        }
        return(increaseStocklist);
    }
コード例 #17
0
    static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name = "股东全称";
        StockHolderRule.Rule = new string[] { "股东名称" }.ToList();
        StockHolderRule.IsEq = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name      = "变动截止日期";
        ChangeDateRule.Rule      = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList();
        ChangeDateRule.IsEq      = false;
        ChangeDateRule.Normalize = Normalizer.NormailizeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Rule      = new string[] { "减持均价", "增持均价" }.ToList();
        ChangePriceRule.IsEq      = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Rule      = new string[] { "减持股数", "增持股数" }.ToList();
        ChangeNumberRule.IsEq      = false;
        ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber;

        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录
        result.Reverse();
        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;
            var Name = NormalizeCompanyName(rec[0].RawData);
            stockchange.HolderFullName  = Name.Item1;
            stockchange.HolderShortName = Name.Item2;
            stockchange.ChangeEndDate   = rec[1].RawData;
            stockchange.ChangePrice     = rec[2].RawData;
            stockchange.ChangeNumber    = rec[3].RawData;
            var holderafterlist = GetHolderAfter(root);
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                if (after.Used)
                {
                    continue;
                }
                if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName)
                {
                    stockchange.HoldNumberAfterChange  = after.Count;
                    stockchange.HoldPercentAfterChange = after.Percent;
                    after.Used = true;
                    break;
                }
            }
            stockchangelist.Add(stockchange);
        }
        return(stockchangelist);
    }
コード例 #18
0
    List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id)
    {
        var StockHolderRule = new TableSearchRule();

        StockHolderRule.Name      = "股东全称";
        StockHolderRule.Title     = new string[] { "股东名称", "名称", "增持主体", "增持人", "减持主体", "减持人" }.ToList();
        StockHolderRule.IsTitleEq = true;
        StockHolderRule.IsRequire = true;

        var ChangeDateRule = new TableSearchRule();

        ChangeDateRule.Name  = "变动截止日期";
        ChangeDateRule.Title = new string[] { "买卖时间", "日期", "减持期间", "增持期间", "减持股份期间", "增持股份期间",
                                              "减持时间", "增持时间", "减持股份时间", "增持股份时间" }.ToList();
        ChangeDateRule.IsTitleEq = false;
        ChangeDateRule.Normalize = NormailizeEndChangeDate;


        var ChangePriceRule = new TableSearchRule();

        ChangePriceRule.Name      = "变动价格";
        ChangePriceRule.Title     = new string[] { "成交均价", "减持价格", "增持价格", "减持均", "增持均" }.ToList();
        ChangePriceRule.IsTitleEq = false;
        ChangePriceRule.Normalize = (x, y) =>
        {
            if (x.Contains("元"))
            {
                return(Utility.GetStringBefore(x, "元"));
            }
            return(x);
        };

        var ChangeNumberRule = new TableSearchRule();

        ChangeNumberRule.Name      = "变动数量";
        ChangeNumberRule.Title     = new string[] { "成交数量", "减持股数", "增持股数", "减持数量", "增持数量" }.ToList();
        ChangeNumberRule.IsTitleEq = false;
        ChangeNumberRule.Normalize = NumberUtility.NormalizerStockNumber;


        var Rules = new List <TableSearchRule>();

        Rules.Add(StockHolderRule);
        Rules.Add(ChangeDateRule);
        Rules.Add(ChangePriceRule);
        Rules.Add(ChangeNumberRule);

        var result = HTMLTable.GetMultiInfo(root, Rules, false);

        if (result.Count == 0)
        {
            //没有抽取到任何数据
            Rules.Clear();
            ChangeDateRule.IsRequire = true;
            Rules.Add(ChangeDateRule);
            Rules.Add(ChangePriceRule);
            Rules.Add(ChangeNumberRule);
            result = HTMLTable.GetMultiInfo(root, Rules, false);
            if (result.Count == 0)
            {
                return(new List <struStockChange>());
            }
            var NewResult = new List <CellInfo[]>();
            var Name      = GetHolderName(this.root);
            if (String.IsNullOrEmpty(Name.FullName) && String.IsNullOrEmpty(Name.ShortName))
            {
                return(new List <struStockChange>());
            }
            foreach (var item in result)
            {
                NewResult.Add(new CellInfo[]
                              { new CellInfo()
                                {
                                    RawData = String.IsNullOrEmpty(Name.FullName)?Name.ShortName:Name.FullName
                                }, item[0], item[1], item[2] });
            }
            result = NewResult;
        }

        var holderafterlist = GetHolderAfter();

        var stockchangelist = new List <struStockChange>();

        foreach (var rec in result)
        {
            var stockchange = new struStockChange();
            stockchange.id = id;

            var ModifyName = rec[0].RawData;
            //表格里面长的名字可能被分页切割掉
            //这里使用合计表进行验证
            if (!holderafterlist.Select((z) => { return(z.Name); }).ToList().Contains(ModifyName))
            {
                foreach (var item in holderafterlist)
                {
                    if (item.Name.EndsWith("先生"))
                    {
                        break;                            //特殊处理,没有逻辑可言
                    }
                    if (item.Name.StartsWith(ModifyName) && !item.Name.Equals(ModifyName))
                    {
                        ModifyName = item.Name;
                        break;
                    }
                    if (item.Name.EndsWith(ModifyName) && !item.Name.Equals(ModifyName))
                    {
                        ModifyName = item.Name;
                        break;
                    }
                }
            }


            var Name = CompanyNameLogic.NormalizeCompanyName(this, ModifyName);
            stockchange.HolderFullName  = Name.FullName.NormalizeTextResult();
            stockchange.HolderShortName = Name.ShortName;

            if (stockchange.HolderFullName.Contains("简称"))
            {
                stockchange.HolderShortName = Utility.GetStringAfter(stockchange.HolderFullName, "简称");
                stockchange.HolderShortName = stockchange.HolderShortName.Replace(")", String.Empty).Replace("“", String.Empty).Replace("”", String.Empty);
                stockchange.HolderFullName  = Utility.GetStringBefore(stockchange.HolderFullName, "(");
            }

            stockchange.ChangeEndDate = rec[1].RawData;

            DateTime x;
            if (!DateTime.TryParse(stockchange.ChangeEndDate, out x))
            {
                //无法处理的情况
                if (!Program.IsDebugMode)
                {
                    //非调试模式
                    stockchange.ChangeEndDate = String.Empty;
                }
            }

            if (!String.IsNullOrEmpty(rec[2].RawData))
            {
                //股价区间化的去除
                if (!(rec[2].RawData.Contains("-") || rec[2].RawData.Contains("~") || rec[2].RawData.Contains("至")))
                {
                    stockchange.ChangePrice = rec[2].RawData.Replace(" ", String.Empty);
                    stockchange.ChangePrice = stockchange.ChangePrice.NormalizeNumberResult();
                }
            }
            if (!RegularTool.IsUnsign(stockchange.ChangePrice))
            {
                if (!String.IsNullOrEmpty(stockchange.ChangePrice))
                {
                    Console.WriteLine("Error ChangePrice:[" + stockchange.ChangePrice + "]");
                }
                stockchange.ChangePrice = String.Empty;
            }


            if (!String.IsNullOrEmpty(rec[3].RawData))
            {
                stockchange.ChangeNumber = rec[3].RawData.Replace(" ", String.Empty);
                stockchange.ChangeNumber = stockchange.ChangeNumber.NormalizeNumberResult();
                if (!RegularTool.IsUnsign(stockchange.ChangeNumber))
                {
                    if (!String.IsNullOrEmpty(stockchange.ChangeNumber))
                    {
                        Console.WriteLine("Error ChangeNumber:[" + stockchange.ChangeNumber + "]");
                    }
                    stockchange.ChangeNumber = String.Empty;
                }
            }

            //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的
            if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate))
            {
                continue;
            }
            if (stockchange.ChangeNumber == "0" || stockchange.ChangePrice == "0")
            {
                continue;
            }
            stockchangelist.Add(stockchange);
        }


        //寻找所有的股东全称
        var namelist = stockchangelist.Select(x => x.HolderFullName).Distinct().ToList();
        var newRec   = new List <struStockChange>();

        foreach (var name in namelist)
        {
            var stocklist = stockchangelist.Where((x) => { return(x.HolderFullName == name); }).ToList();
            stocklist.Sort((x, y) => { return(x.ChangeEndDate.CompareTo(y.ChangeEndDate)); });
            var last = stocklist.Last();
            for (int i = 0; i < holderafterlist.Count; i++)
            {
                var after = holderafterlist[i];
                after.Name = after.Name.Replace(" ", "");
                if (after.Name == last.HolderFullName || after.Name == last.HolderShortName)
                {
                    stockchangelist.Remove(last);   //结构体,无法直接修改!!使用删除,增加的方法
                    last.HoldNumberAfterChange  = after.Count;
                    last.HoldPercentAfterChange = after.Percent;
                    newRec.Add(last);
                }
            }
        }

        if (holderafterlist.Count != namelist.Count)
        {
            if (!Program.IsMultiThreadMode)
            {
                Program.Logger.WriteLine("增持者数量确认!");
            }
        }

        stockchangelist.AddRange(newRec);
        return(stockchangelist);
    }
コード例 #19
0
ファイル: HTMLTableSearch.cs プロジェクト: lxxwin/FDDC
    /// <summary>
    /// 标题优先度
    /// </summary>
    /// <param name="root"></param>
    /// <param name="Rules"></param>
    /// <param name="IsMeger"></param>
    /// <returns></returns>
    public static List <CellInfo[]> GetMultiInfoByTitleRules(HTMLEngine.MyRootHtmlNode root, List <TableSearchTitleRule> Rules, bool IsMeger)
    {
        var Container = new List <CellInfo[]>();

        for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++)
        {
            var      table = new HTMLTable(root.TableList[tableIndex + 1]);
            var      checkResultColumnNo = new int[Rules.Count];
            var      checkResultTitle    = new string[Rules.Count];
            var      HeaderRowNo         = -1;
            String[] HeaderRow           = null;
            var      IsFirstRowOneCell   = false; //第一行是否为整行合并
            for (int TestRowHeader = 1; TestRowHeader < table.RowCount; TestRowHeader++)
            {
                checkResultColumnNo = new int[Rules.Count];
                var IsOneColumnRow = true;  //是否整行合并
                for (int i = 2; i <= table.ColumnCount; i++)
                {
                    if (table.CellValue(TestRowHeader, i) != (table.CellValue(TestRowHeader, 1)))
                    {
                        IsOneColumnRow = false;
                        break;
                    }
                }
                if (IsOneColumnRow)
                {
                    if (TestRowHeader == 1)
                    {
                        IsFirstRowOneCell = true;
                    }
                    continue;
                }
                HeaderRow = table.GetRow(TestRowHeader);
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    foreach (var EvaluateTitle in Rules[checkItemIdx].Title)
                    {
                        //根据标题优先度检索,对每个标题单独检索
                        for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++)
                        {
                            //在每个行首单元格检索
                            //标题的处理
                            if (Rules[checkItemIdx].IsTitleEq)
                            {
                                //相等模式
                                if (!EvaluateTitle.Equals(HeaderRow[ColIndex].Replace(" ", "")))
                                {
                                    continue;
                                }
                                if (Rules[checkItemIdx].ExcludeTitle != null)
                                {
                                    var isOK = true;
                                    foreach (var word in Rules[checkItemIdx].ExcludeTitle)
                                    {
                                        if (HeaderRow[ColIndex].Contains(word))
                                        {
                                            isOK = false;
                                            break;
                                        }
                                    }
                                    if (!isOK)
                                    {
                                        continue;
                                    }
                                }
                            }
                            else
                            {
                                //包含模式
                                if (!HeaderRow[ColIndex].Replace(" ", "").Contains(EvaluateTitle))
                                {
                                    continue;
                                }
                                if (Rules[checkItemIdx].ExcludeTitle != null)
                                {
                                    var isOK = true;
                                    foreach (var word in Rules[checkItemIdx].ExcludeTitle)
                                    {
                                        if (HeaderRow[ColIndex].Contains(word))
                                        {
                                            isOK = false;
                                            break;
                                        }
                                    }
                                    if (!isOK)
                                    {
                                        continue;
                                    }
                                }
                            }

                            //父标题的处理
                            if (Rules[checkItemIdx].SuperTitle != null && Rules[checkItemIdx].SuperTitle.Count != 0)
                            {
                                //具有父标题的情况
                                var IsFoundSuperTitle = false;
                                for (int superRowNo = 1; superRowNo < TestRowHeader; superRowNo++)
                                {
                                    var value = table.CellValue(superRowNo, ColIndex + 1).Replace(" ", "");
                                    if (Rules[checkItemIdx].IsSuperTitleEq)
                                    {
                                        //等于
                                        if (Rules[checkItemIdx].SuperTitle.Contains(value))
                                        {
                                            IsFoundSuperTitle = true;
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        //包含
                                        foreach (var supertitle in Rules[checkItemIdx].SuperTitle)
                                        {
                                            if (value.Contains(supertitle))
                                            {
                                                IsFoundSuperTitle = true;
                                                break;
                                            }
                                        }
                                    }
                                    if (IsFoundSuperTitle)
                                    {
                                        break;
                                    }
                                }
                                if (!IsFoundSuperTitle)
                                {
                                    continue;
                                }
                            }
                            checkResultTitle[checkItemIdx]    = HeaderRow[ColIndex];
                            checkResultColumnNo[checkItemIdx] = ColIndex + 1;
                            break;
                        }
                        if (!String.IsNullOrEmpty(checkResultTitle[checkItemIdx]))
                        {
                            break;
                        }
                    }
                    //主字段没有找到,其他不用找了
                    if (checkResultColumnNo[0] == 0)
                    {
                        break;
                    }
                }

                bool IsAllRequiredItemOK = true;
                for (int checkItemIdx = 0; checkItemIdx < checkResultColumnNo.Length; checkItemIdx++)
                {
                    if (checkResultColumnNo[checkItemIdx] == 0 && Rules[checkItemIdx].IsRequire)
                    {
                        IsAllRequiredItemOK = false;
                        break;
                    }
                }

                if (IsAllRequiredItemOK)
                {
                    if (TestRowHeader == 1 || IsFirstRowOneCell)
                    {
                        HeaderRowNo = TestRowHeader;
                        break;
                    }
                    else
                    {
                        //对于标题栏非首行的情况,如果不是首行是一个大的整行合并单元格,则做严格检查
                        //进行严格的检查,暂时要求全匹配
                        var IsOK = true;
                        for (int i = 0; i < Rules.Count; i++)
                        {
                            if (checkResultColumnNo[i] == 0)
                            {
                                IsOK = false;
                                break;
                            }
                        }
                        if (IsOK)
                        {
                            HeaderRowNo = TestRowHeader;
                            break;
                        }
                    }
                }
            }

            //主字段没有找到,下一张表
            if (HeaderRowNo == -1)
            {
                continue;
            }

            for (int RowNo = HeaderRowNo; RowNo <= table.RowCount; RowNo++)
            {
                if (RowNo == HeaderRowNo)
                {
                    continue;
                }
                if (table.IsTotalRow(RowNo))
                {
                    continue;                                                //非合计行
                }
                var target = table.CellValue(RowNo, checkResultColumnNo[0]); //主字段非空
                if (target == String.Empty || target == strRowSpanValue || target == strColSpanValue || target == strNullValue)
                {
                    continue;
                }
                if (Rules[0].Title.Contains(target))
                {
                    continue;
                }

                var RowData = new CellInfo[Rules.Count];
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    if (checkResultColumnNo[checkItemIdx] == 0)
                    {
                        continue;
                    }
                    var ColNo = checkResultColumnNo[checkItemIdx];
                    RowData[checkItemIdx].TableId = tableIndex + 1;
                    RowData[checkItemIdx].Row     = RowNo;
                    RowData[checkItemIdx].Column  = ColNo;
                    RowData[checkItemIdx].Title   = checkResultTitle[checkItemIdx];
                    if (table.CellValue(RowNo, ColNo).Equals(strNullValue))
                    {
                        continue;
                    }
                    RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo);
                    if (Rules[checkItemIdx].Normalize != null)
                    {
                        RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]);
                    }
                }

                var HasSame = false;
                foreach (var existRow in Container)
                {
                    if (IsSameContent(existRow, RowData))
                    {
                        HasSame = true;
                        break;
                    }
                }
                if (!HasSame)
                {
                    Container.Add(RowData);
                }
            }
        }
        if (IsMeger)
        {
            Container = MergerMultiInfo(Container);
        }
        return(Container);
    }