Beispiel #1
0
    public static void TrainingIncreaseTarget()
    {
        TraningDataset.InitIncreaseStock();
        var PreviewId   = "";
        var PreviewRoot = new HTMLEngine.MyRootHtmlNode();

        foreach (var increase in TraningDataset.IncreaseStockList)
        {
            if (PreviewId.Equals(increase.id))
            {
                var htmlfile = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\定增\html\" + increase.id + ".html";
                PreviewRoot = HTMLEngine.Anlayze(htmlfile);
            }
            TableAnlayzeTool.PutTrainingItem(PreviewRoot, increase.PublishTarget);
        }

        var Rank = new List <int>();

        Rank = TableAnlayzeTool.TrainingTitleResult.Values.ToList();
        Rank.Sort();
        Rank.Reverse();
        var Top10 = Rank[9];

        foreach (var title in TableAnlayzeTool.TrainingTitleResult)
        {
            if (title.Value >= Top10)
            {
                Console.WriteLine(title.Key + ":" + title.Value);
            }
        }
    }
Beispiel #2
0
 private static void Traning()
 {
     Training = new StreamWriter("Training.log");
     TraningDataset.InitContract();
     TraningDataset.InitStockChange();
     TraningDataset.InitIncreaseStock();
     ContractTraning.Train();
     Training.Close();
 }
Beispiel #3
0
 private static void Traning()
 {
     TraningDataset.InitContract();
     TraningDataset.InitStockChange();
     TraningDataset.InitReorganization();
     Training = new StreamWriter("Training.log");
     //ContractTraning.Train();
     //ReOrganizationTraning.Train();
     Training.Close();
 }
Beispiel #4
0
    public static void AnlayzeEntitySurroundWords()
    {
        var         ContractPath_TRAIN      = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同";
        Surround    JiaFangSurround         = new Surround();
        Surround    YiFangSurround          = new Surround();
        Surround    ProjectNameSurround     = new Surround();
        Surround    ContractNameSurround    = new Surround();
        LeadingWord JiaFangNameLeadingWord  = new LeadingWord();
        LeadingWord YiFangNameLeadingWord   = new LeadingWord();
        LeadingWord ProjectNameLeadingWord  = new LeadingWord();
        LeadingWord ContractNameLeadingWord = new LeadingWord();

        foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
        {
            var fi = new System.IO.FileInfo(filename);
            var Id = fi.Name.Replace(".html", String.Empty);
            if (TraningDataset.GetContractById(Id).Count == 0)
            {
                continue;
            }
            var contract = TraningDataset.GetContractById(Id).First();
            var doc      = new AnnouceDocument(filename);
            //if (!string.IsNullOrEmpty(contract.JiaFang)) JiaFangSurround.AnlayzeEntitySurroundWords(doc, contract.JiaFang);
            //if (!string.IsNullOrEmpty(contract.YiFang)) YiFangSurround.AnlayzeEntitySurroundWords(doc, contract.YiFang);
            //if (!string.IsNullOrEmpty(contract.ProjectName)) ProjectNameSurround.AnlayzeEntitySurroundWords(doc, contract.ProjectName);
            //if (!string.IsNullOrEmpty(contract.ContractName)) ContractNameSurround.AnlayzeEntitySurroundWords(doc, contract.ContractName);
            if (!string.IsNullOrEmpty(contract.JiaFang))
            {
                JiaFangNameLeadingWord.AnlayzeLeadingWord(doc, contract.JiaFang);
            }
            if (!string.IsNullOrEmpty(contract.YiFang))
            {
                YiFangNameLeadingWord.AnlayzeLeadingWord(doc, contract.YiFang);
            }
            if (!string.IsNullOrEmpty(contract.ProjectName))
            {
                ProjectNameLeadingWord.AnlayzeLeadingWord(doc, contract.ProjectName);
            }
            if (!string.IsNullOrEmpty(contract.ContractName))
            {
                ContractNameLeadingWord.AnlayzeLeadingWord(doc, contract.ContractName);
            }
        }
        //JiaFangSurround.GetTop(10);
        //YiFangSurround.GetTop(10);
        //ProjectNameSurround.GetTop(10);
        //ContractNameSurround.GetTop(10);

        JiaFangLeadingDict      = JiaFangNameLeadingWord.GetTop(5);
        YiFangLeadingDict       = YiFangNameLeadingWord.GetTop(5);
        ProjectNameLeadingDict  = ProjectNameLeadingWord.GetTop(5);
        ContractNameLeadingDict = ContractNameLeadingWord.GetTop(5);
    }
Beispiel #5
0
 static void Main(string[] args)
 {
     //生成PDF的TXT文件的批处理命令
     //PDFToTXT.GetBatchFile();
     //初始化
     Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
     BussinessLogic.LoadCompanyName(@"Resources\FDDC_announcements_company_name_20180531.json");
     TraningDataset.InitContract();
     TraningDataset.InitStockChange();
     TraningDataset.InitIncreaseStock();
     ContractTraning.TraningMaxLenth();
     ContractTraning.EntityWordPerperty();
     Training.Close();
     UT();
     Extract();
     Logger.Close();
     Score.Close();
 }
Beispiel #6
0
    public static void RunWordAnlayze()
    {
        var root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1044779.html");
        var Contract = TraningDataset.GetContractById("1044779")[0];

        EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1450.html");
        Contract = TraningDataset.GetContractById("1450")[0];
        EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1042224.html");
        Contract = TraningDataset.GetContractById("1042224")[0];
        EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root     = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\917362.html");
        Contract = TraningDataset.GetContractById("917362")[0];
        EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName);
    }
Beispiel #7
0
    public static void AnlayzeEntitySurroundWords()
    {
        var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同";

        Console.WriteLine("前导词:甲方");
        foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
        {
            var fi = new System.IO.FileInfo(filename);
            var Id = fi.Name.Replace(".html", "");
            if (TraningDataset.GetContractById(Id).Count == 0)
            {
                continue;
            }
            var contract = TraningDataset.GetContractById(Id).First();
            if (contract.JiaFang == "")
            {
                continue;
            }
            var root = HTMLEngine.Anlayze(filename);
            EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, contract.JiaFang);
        }
    }
Beispiel #8
0
    /// <summary>
    /// 分析实体周边词语
    /// </summary>
    public static void AnlayzeEntitySurroundWords()
    {
        var      ContractPath_TRAIN   = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同";
        Surround JiaFangSurround      = new Surround();
        Surround YiFangSurround       = new Surround();
        Surround ProjectNameSurround  = new Surround();
        Surround ContractNameSurround = new Surround();

        LeadingColonWord JiaFangLeadingColonWord      = new LeadingColonWord();
        LeadingColonWord YiFangLeadingColonWord       = new LeadingColonWord();
        LeadingColonWord ProjectNameLeadingColonWord  = new LeadingColonWord();
        LeadingColonWord ContractNameLeadingColonWord = new LeadingColonWord();

        foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
        {
            var fi = new System.IO.FileInfo(filename);
            var Id = fi.Name.Replace(".html", String.Empty);
            if (TraningDataset.GetContractById(Id).Count == 0)
            {
                continue;
            }
            var contract = TraningDataset.GetContractById(Id).First();
            var doc      = new Contract();
            doc.Init(filename);
            if (!string.IsNullOrEmpty(contract.JiaFang))
            {
                JiaFangSurround.AnlayzeEntitySurroundWords(doc, contract.JiaFang);
            }
            if (!string.IsNullOrEmpty(contract.YiFang))
            {
                YiFangSurround.AnlayzeEntitySurroundWords(doc, contract.YiFang);
            }
            if (!string.IsNullOrEmpty(contract.ProjectName))
            {
                ProjectNameSurround.AnlayzeEntitySurroundWords(doc, contract.ProjectName);
            }
            if (!string.IsNullOrEmpty(contract.ContractName))
            {
                ContractNameSurround.AnlayzeEntitySurroundWords(doc, contract.ContractName);
            }

            if (!string.IsNullOrEmpty(contract.JiaFang))
            {
                JiaFangLeadingColonWord.AnlayzeLeadingWord(doc, contract.JiaFang);
            }
            if (!string.IsNullOrEmpty(contract.YiFang))
            {
                YiFangLeadingColonWord.AnlayzeLeadingWord(doc, contract.YiFang);
            }
            if (!string.IsNullOrEmpty(contract.ProjectName))
            {
                ProjectNameLeadingColonWord.AnlayzeLeadingWord(doc, contract.ProjectName);
            }
            if (!string.IsNullOrEmpty(contract.ContractName))
            {
                ContractNameLeadingColonWord.AnlayzeLeadingWord(doc, contract.ContractName);
            }
        }

        JiaFangSurround.WriteToLog(Program.Training);
        Program.Training.WriteLine("甲方:冒号前导词");
        JiaFangLeadingColonWord.WriteToLog(Program.Training);
        JiaFangLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, JiaFangLeadingColonWord.LeadingWordDict));

        YiFangSurround.WriteToLog(Program.Training);
        Program.Training.WriteLine("乙方:冒号前导词");
        YiFangLeadingColonWord.WriteToLog(Program.Training);
        YiFangLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, YiFangLeadingColonWord.LeadingWordDict));

        ProjectNameSurround.WriteToLog(Program.Training);
        Program.Training.WriteLine("工程名:冒号前导词");
        ProjectNameLeadingColonWord.WriteToLog(Program.Training);
        ProjectNameLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, ProjectNameLeadingColonWord.LeadingWordDict));

        ContractNameSurround.WriteToLog(Program.Training);
        Program.Training.WriteLine("合同名:冒号前导词");
        ContractNameLeadingColonWord.WriteToLog(Program.Training);
        ContractNameLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, ContractNameLeadingColonWord.LeadingWordDict));
    }
Beispiel #9
0
    /// <summary>
    /// 使用LTP方式分析实体周边词语
    /// </summary>
    public static void AnlayzeEntitySurroundWordsLTP()
    {
        var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同";

        var JiaFangDP       = new LTPTrainingDP();
        var JiaFangSRL      = new LTPTrainingSRL();
        var YiFnagDP        = new LTPTrainingDP();
        var YiFnagSRL       = new LTPTrainingSRL();
        var ContractNameDP  = new LTPTrainingDP();
        var ContractNameSRL = new LTPTrainingSRL();
        var ProjectNameDP   = new LTPTrainingDP();
        var ProjectNameSRL  = new LTPTrainingSRL();

        foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\"))
        {
            var fi = new System.IO.FileInfo(filename);
            var Id = fi.Name.Replace(".html", String.Empty);
            if (TraningDataset.GetContractById(Id).Count == 0)
            {
                continue;
            }
            var contract = TraningDataset.GetContractById(Id).First();
            var c        = new Contract();
            c.Init(filename);
            if (!string.IsNullOrEmpty(contract.JiaFang))
            {
                JiaFangDP.Training(c.Dplist, contract.JiaFang);
                JiaFangSRL.Training(c.Srllist, contract.JiaFang);
            }

            if (!string.IsNullOrEmpty(contract.YiFang))
            {
                YiFnagDP.Training(c.Dplist, contract.YiFang);
                YiFnagSRL.Training(c.Srllist, contract.YiFang);
            }

            if (!string.IsNullOrEmpty(contract.ContractName))
            {
                ContractNameDP.Training(c.Dplist, contract.ContractName);
                ContractNameSRL.Training(c.Srllist, contract.ContractName);
            }
            if (!string.IsNullOrEmpty(contract.ProjectName))
            {
                ProjectNameDP.Training(c.Dplist, contract.ProjectName);
                ProjectNameSRL.Training(c.Srllist, contract.ProjectName);
            }
        }


        Program.Training.WriteLine("甲方附近词语分析(DP):");
        JiaFangDP.WriteToLog(Program.Training);
        Program.Training.WriteLine("甲方附近词语分析(SRL):");
        JiaFangSRL.WriteToLog(Program.Training);

        Program.Training.WriteLine("乙方附近词语分析(DP):");
        YiFnagDP.WriteToLog(Program.Training);
        Program.Training.WriteLine("乙方附近词语分析(SRL):");
        YiFnagSRL.WriteToLog(Program.Training);

        Program.Training.WriteLine("合同名附近词语分析(DP):");
        ContractNameDP.WriteToLog(Program.Training);
        Program.Training.WriteLine("合同名附近词语分析(SRL):");
        ContractNameSRL.WriteToLog(Program.Training);

        Program.Training.WriteLine("工程名附近词语分析(DP):");
        ProjectNameDP.WriteToLog(Program.Training);
        Program.Training.WriteLine("工程名附近词语分析(SRL):");
        ProjectNameSRL.WriteToLog(Program.Training);
    }