예제 #1
0
파일: UT.cs 프로젝트: ReadyThalia/FDDC
    public static void RunWordAnlayze()
    {
        var root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1044779.html");
        var Contract = Traning.GetContractById("1044779")[0];

        WordAnlayze.Anlayze(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1450.html");
        Contract = Traning.GetContractById("1450")[0];
        WordAnlayze.Anlayze(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1042224.html");
        Contract = Traning.GetContractById("1042224")[0];
        WordAnlayze.Anlayze(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\917362.html");
        Contract = Traning.GetContractById("917362")[0];
        WordAnlayze.Anlayze(root, Contract.ProjectName);
    }
예제 #2
0
        static void Main(string[] args)
        {
            //初始化
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            BussinessLogic.LoadCompanyName(@"Resources\FDDC_announcements_company_name_20180531.json");

            //测试区
            //生成PDF的TXT文件的批处理命令
            //PDFToTXT.GetBatchFile();
            //分词系统
            //WordAnlayze.CompanyAnlayze();
            //UT.RunWordAnlayze();
            //UT.StockChangeTest();
            //UT.IncreaseStockTest();
            //UT.ContractTest();
            //UT.RegularExpress();
            //UT.JianchengTest();
            //Logger.Close();
            //Traning.InitIncreaseStock();
            //StockChange.Extract(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\增减持\html\314146.html");
            //WordAnlayze.segmenter.LoadUserDict(@"Resources\dictAdjust.txt");
            //return;

            var IsRunContract      = true;
            var IsRunContract_TEST = false;

            var IsRunStockChange      = false;
            var IsRunStockChange_TEST = false;

            var IsRunIncreaseStock      = false;
            var IsRunIncreaseStock_TEST = false;

            var IncreaseStockPath_TEST = DocBase + @"\FDDC_announcements_round1_test_a_20180605\定增";
            var ContractPath_TEST      = DocBase + @"\FDDC_announcements_round1_test_a_20180605\重大合同";
            var StockChangePath_TEST   = DocBase + @"\FDDC_announcements_round1_test_a_20180605\增减持";

            if (IsRunContract)
            {
                //合同处理
                var ContractPath_TRAIN = DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同";
                Console.WriteLine("Start To Extract Info Contract TRAIN");
                StreamWriter ResultCSV          = new StreamWriter("Result\\hetong_train.csv", false, Encoding.GetEncoding("gb2312"));
                var          StockChange_Result = new List <struContract>();
                foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
                {
                    foreach (var item in Contract.Extract(filename))
                    {
                        StockChange_Result.Add(item);
                        ResultCSV.WriteLine(Contract.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Traning.InitContract();
                Evaluate.EvaluateContract(StockChange_Result);
                Console.WriteLine("Complete Extract Info Contract");
            }
            if (IsRunContract_TEST)
            {
                StreamWriter ResultCSV = new StreamWriter("Result\\hetong.csv", false, Encoding.GetEncoding("gb2312"));
                Console.WriteLine("Start To Extract Info Contract TEST");
                foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TEST + @"\html\"))
                {
                    foreach (var item in Contract.Extract(filename))
                    {
                        ResultCSV.WriteLine(Contract.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Console.WriteLine("Complete Extract Info Contract");
            }


            if (IsRunStockChange)
            {
                //增减持
                Console.WriteLine("Start To Extract Info StockChange TRAIN");
                StreamWriter ResultCSV             = new StreamWriter("Result\\zengjianchi_Train.csv", false, Encoding.GetEncoding("gb2312"));
                var          StockChangePath_TRAIN = DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\增减持";
                var          StockChange_Result    = new List <struStockChange>();
                foreach (var filename in System.IO.Directory.GetFiles(StockChangePath_TRAIN + @"\html\"))
                {
                    foreach (var item in StockChange.Extract(filename))
                    {
                        StockChange_Result.Add(item);
                        ResultCSV.WriteLine(StockChange.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Traning.InitStockChange();
                Evaluate.EvaluateStockChange(StockChange_Result);
                Console.WriteLine("Complete Extract Info StockChange");
            }
            if (IsRunStockChange_TEST)
            {
                StreamWriter ResultCSV = new StreamWriter("Result\\zengjianchi.csv", false, Encoding.GetEncoding("gb2312"));
                Console.WriteLine("Start To Extract Info StockChange TEST");
                foreach (var filename in System.IO.Directory.GetFiles(StockChangePath_TEST + @"\html\"))
                {
                    foreach (var item in StockChange.Extract(filename))
                    {
                        ResultCSV.WriteLine(StockChange.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Console.WriteLine("Complete Extract Info StockChange");
            }

            if (IsRunIncreaseStock)
            {
                //定增
                StreamWriter ResultCSV = new StreamWriter("Result\\dingzeng_train.csv", false, Encoding.GetEncoding("gb2312"));
                var          IncreaseStockPath_TRAIN = DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\定增";
                Console.WriteLine("Start To Extract Info IncreaseStock TRAIN");
                var Increase_Result = new List <struIncreaseStock>();
                foreach (var filename in System.IO.Directory.GetFiles(IncreaseStockPath_TRAIN + @"\html\"))
                {
                    foreach (var item in IncreaseStock.Extract(filename))
                    {
                        Increase_Result.Add(item);
                        ResultCSV.WriteLine(IncreaseStock.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Traning.InitIncreaseStock();
                Evaluate.EvaluateIncreaseStock(Increase_Result);
                Console.WriteLine("Complete Extract Info IncreaseStock");
            }

            if (IsRunIncreaseStock_TEST)
            {
                StreamWriter ResultCSV = new StreamWriter("Result\\dingzeng.csv", false, Encoding.GetEncoding("gb2312"));
                Console.WriteLine("Start To Extract Info IncreaseStock TEST");
                foreach (var filename in System.IO.Directory.GetFiles(IncreaseStockPath_TEST + @"\html\"))
                {
                    foreach (var item in IncreaseStock.Extract(filename))
                    {
                        ResultCSV.WriteLine(IncreaseStock.ConvertToString(item));
                    }
                }
                ResultCSV.Close();
                Console.WriteLine("Complete Extract Info IncreaseStock");
            }
            Logger.Close();
        }