Esempio n. 1
0
    public static List <struStockChange> Extract(string htmlFileName)
    {
        var list = new List <struStockChange>();
        var fi   = new System.IO.FileInfo(htmlFileName);

        Program.Logger.WriteLine("Start FileName:[" + fi.Name + "]");
        var node = HTMLEngine.Anlayze(htmlFileName);

        companynamelist = BussinessLogic.GetCompanyNameByCutWord(node);

        list = ExtractFromTable(node, fi.Name.Replace(".html", ""));
        if (list.Count > 0)
        {
            return(list);
        }

        var stockchange = new struStockChange();

        //公告ID
        stockchange.id = fi.Name.Replace(".html", "");
        Program.Logger.WriteLine("公告ID:" + stockchange.id);
        var Name = NormalizeCompanyName(GetHolderFullName(node));

        stockchange.HolderFullName  = Name.Item1;
        stockchange.HolderShortName = Name.Item2;
        stockchange.ChangeEndDate   = GetChangeEndDate(node);
        list.Add(stockchange);
        return(list);
    }
Esempio n. 2
0
    public static void JianchengTest()
    {
        BussinessLogic.GetCompanyNameByCutWord(HTMLEngine.Anlayze(FDDC.Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\增减持\html\20526193.html"));

        var ContractPath_TRAIN = FDDC.Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同";

        Console.WriteLine("Start To Extract Info Contract TRAIN");
        foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
        {
            var root = HTMLEngine.Anlayze(filename);
            var fi   = new System.IO.FileInfo(filename);
            FDDC.Program.Logger.WriteLine("FileName:" + fi.Name);
            BussinessLogic.GetCompanyShortName(root);
            BussinessLogic.GetCompanyFullName(root);
        }
        Console.WriteLine("Complete Extract Info Contract");

        var StockChangePath_TRAIN = FDDC.Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\增减持";

        Console.WriteLine("Start To Extract Info Contract TRAIN");
        foreach (var filename in System.IO.Directory.GetFiles(StockChangePath_TRAIN + @"\html\"))
        {
            var root = HTMLEngine.Anlayze(filename);
            var fi   = new System.IO.FileInfo(filename);
            FDDC.Program.Logger.WriteLine("FileName:" + fi.Name);
            BussinessLogic.GetCompanyShortName(root);
            BussinessLogic.GetCompanyFullName(root);
        }

        Console.WriteLine("Complete Extract Info Contract");

        var IncreaseStockPath_TRAIN = FDDC.Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\定增";

        Console.WriteLine("Start To Extract Info Contract TRAIN");
        foreach (var filename in System.IO.Directory.GetFiles(IncreaseStockPath_TRAIN + @"\html\"))
        {
            var root = HTMLEngine.Anlayze(filename);
            var fi   = new System.IO.FileInfo(filename);
            FDDC.Program.Logger.WriteLine("FileName:" + fi.Name);
            BussinessLogic.GetCompanyShortName(root);
            BussinessLogic.GetCompanyFullName(root);
        }
        Console.WriteLine("Complete Extract Info Contract");
    }
Esempio n. 3
0
    public static List <struContract> Extract(string htmlFileName)
    {
        //模式1:只有一个主合同
        //模式2:只有多个子合同
        //模式3:有一个主合同以及多个子合同
        var ContractList = new List <struContract>();
        var fi           = new System.IO.FileInfo(htmlFileName);

        Program.Logger.WriteLine("Start FileName:[" + fi.Name + "]");
        var node = HTMLEngine.Anlayze(htmlFileName);

        companynamelist = BussinessLogic.GetCompanyNameByCutWord(node);

        var Id = fi.Name.Replace(".html", "");

        Program.Logger.WriteLine("公告ID:" + Id);
        //主合同的抽取
        ContractList.Add(ExtractSingle(node, Id));
        return(ContractList);
    }