public static void TrainingIncreaseTarget() { TraningDataset.InitIncreaseStock(); var PreviewId = ""; var PreviewRoot = new HTMLEngine.MyRootHtmlNode(); foreach (var increase in TraningDataset.IncreaseStockList) { if (PreviewId.Equals(increase.id)) { var htmlfile = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\定增\html\" + increase.id + ".html"; PreviewRoot = HTMLEngine.Anlayze(htmlfile); } TableAnlayzeTool.PutTrainingItem(PreviewRoot, increase.PublishTarget); } var Rank = new List <int>(); Rank = TableAnlayzeTool.TrainingTitleResult.Values.ToList(); Rank.Sort(); Rank.Reverse(); var Top10 = Rank[9]; foreach (var title in TableAnlayzeTool.TrainingTitleResult) { if (title.Value >= Top10) { Console.WriteLine(title.Key + ":" + title.Value); } } }
private static void Traning() { Training = new StreamWriter("Training.log"); TraningDataset.InitContract(); TraningDataset.InitStockChange(); TraningDataset.InitIncreaseStock(); ContractTraning.Train(); Training.Close(); }
private static void Traning() { TraningDataset.InitContract(); TraningDataset.InitStockChange(); TraningDataset.InitReorganization(); Training = new StreamWriter("Training.log"); //ContractTraning.Train(); //ReOrganizationTraning.Train(); Training.Close(); }
public static void AnlayzeEntitySurroundWords() { var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同"; Surround JiaFangSurround = new Surround(); Surround YiFangSurround = new Surround(); Surround ProjectNameSurround = new Surround(); Surround ContractNameSurround = new Surround(); LeadingWord JiaFangNameLeadingWord = new LeadingWord(); LeadingWord YiFangNameLeadingWord = new LeadingWord(); LeadingWord ProjectNameLeadingWord = new LeadingWord(); LeadingWord ContractNameLeadingWord = new LeadingWord(); foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\")) { var fi = new System.IO.FileInfo(filename); var Id = fi.Name.Replace(".html", String.Empty); if (TraningDataset.GetContractById(Id).Count == 0) { continue; } var contract = TraningDataset.GetContractById(Id).First(); var doc = new AnnouceDocument(filename); //if (!string.IsNullOrEmpty(contract.JiaFang)) JiaFangSurround.AnlayzeEntitySurroundWords(doc, contract.JiaFang); //if (!string.IsNullOrEmpty(contract.YiFang)) YiFangSurround.AnlayzeEntitySurroundWords(doc, contract.YiFang); //if (!string.IsNullOrEmpty(contract.ProjectName)) ProjectNameSurround.AnlayzeEntitySurroundWords(doc, contract.ProjectName); //if (!string.IsNullOrEmpty(contract.ContractName)) ContractNameSurround.AnlayzeEntitySurroundWords(doc, contract.ContractName); if (!string.IsNullOrEmpty(contract.JiaFang)) { JiaFangNameLeadingWord.AnlayzeLeadingWord(doc, contract.JiaFang); } if (!string.IsNullOrEmpty(contract.YiFang)) { YiFangNameLeadingWord.AnlayzeLeadingWord(doc, contract.YiFang); } if (!string.IsNullOrEmpty(contract.ProjectName)) { ProjectNameLeadingWord.AnlayzeLeadingWord(doc, contract.ProjectName); } if (!string.IsNullOrEmpty(contract.ContractName)) { ContractNameLeadingWord.AnlayzeLeadingWord(doc, contract.ContractName); } } //JiaFangSurround.GetTop(10); //YiFangSurround.GetTop(10); //ProjectNameSurround.GetTop(10); //ContractNameSurround.GetTop(10); JiaFangLeadingDict = JiaFangNameLeadingWord.GetTop(5); YiFangLeadingDict = YiFangNameLeadingWord.GetTop(5); ProjectNameLeadingDict = ProjectNameLeadingWord.GetTop(5); ContractNameLeadingDict = ContractNameLeadingWord.GetTop(5); }
static void Main(string[] args) { //生成PDF的TXT文件的批处理命令 //PDFToTXT.GetBatchFile(); //初始化 Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); BussinessLogic.LoadCompanyName(@"Resources\FDDC_announcements_company_name_20180531.json"); TraningDataset.InitContract(); TraningDataset.InitStockChange(); TraningDataset.InitIncreaseStock(); ContractTraning.TraningMaxLenth(); ContractTraning.EntityWordPerperty(); Training.Close(); UT(); Extract(); Logger.Close(); Score.Close(); }
public static void RunWordAnlayze() { var root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1044779.html"); var Contract = TraningDataset.GetContractById("1044779")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1450.html"); Contract = TraningDataset.GetContractById("1450")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1042224.html"); Contract = TraningDataset.GetContractById("1042224")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\917362.html"); Contract = TraningDataset.GetContractById("917362")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); }
public static void AnlayzeEntitySurroundWords() { var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同"; Console.WriteLine("前导词:甲方"); foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\")) { var fi = new System.IO.FileInfo(filename); var Id = fi.Name.Replace(".html", ""); if (TraningDataset.GetContractById(Id).Count == 0) { continue; } var contract = TraningDataset.GetContractById(Id).First(); if (contract.JiaFang == "") { continue; } var root = HTMLEngine.Anlayze(filename); EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, contract.JiaFang); } }
/// <summary> /// 分析实体周边词语 /// </summary> public static void AnlayzeEntitySurroundWords() { var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同"; Surround JiaFangSurround = new Surround(); Surround YiFangSurround = new Surround(); Surround ProjectNameSurround = new Surround(); Surround ContractNameSurround = new Surround(); LeadingColonWord JiaFangLeadingColonWord = new LeadingColonWord(); LeadingColonWord YiFangLeadingColonWord = new LeadingColonWord(); LeadingColonWord ProjectNameLeadingColonWord = new LeadingColonWord(); LeadingColonWord ContractNameLeadingColonWord = new LeadingColonWord(); foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\")) { var fi = new System.IO.FileInfo(filename); var Id = fi.Name.Replace(".html", String.Empty); if (TraningDataset.GetContractById(Id).Count == 0) { continue; } var contract = TraningDataset.GetContractById(Id).First(); var doc = new Contract(); doc.Init(filename); if (!string.IsNullOrEmpty(contract.JiaFang)) { JiaFangSurround.AnlayzeEntitySurroundWords(doc, contract.JiaFang); } if (!string.IsNullOrEmpty(contract.YiFang)) { YiFangSurround.AnlayzeEntitySurroundWords(doc, contract.YiFang); } if (!string.IsNullOrEmpty(contract.ProjectName)) { ProjectNameSurround.AnlayzeEntitySurroundWords(doc, contract.ProjectName); } if (!string.IsNullOrEmpty(contract.ContractName)) { ContractNameSurround.AnlayzeEntitySurroundWords(doc, contract.ContractName); } if (!string.IsNullOrEmpty(contract.JiaFang)) { JiaFangLeadingColonWord.AnlayzeLeadingWord(doc, contract.JiaFang); } if (!string.IsNullOrEmpty(contract.YiFang)) { YiFangLeadingColonWord.AnlayzeLeadingWord(doc, contract.YiFang); } if (!string.IsNullOrEmpty(contract.ProjectName)) { ProjectNameLeadingColonWord.AnlayzeLeadingWord(doc, contract.ProjectName); } if (!string.IsNullOrEmpty(contract.ContractName)) { ContractNameLeadingColonWord.AnlayzeLeadingWord(doc, contract.ContractName); } } JiaFangSurround.WriteToLog(Program.Training); Program.Training.WriteLine("甲方:冒号前导词"); JiaFangLeadingColonWord.WriteToLog(Program.Training); JiaFangLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, JiaFangLeadingColonWord.LeadingWordDict)); YiFangSurround.WriteToLog(Program.Training); Program.Training.WriteLine("乙方:冒号前导词"); YiFangLeadingColonWord.WriteToLog(Program.Training); YiFangLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, YiFangLeadingColonWord.LeadingWordDict)); ProjectNameSurround.WriteToLog(Program.Training); Program.Training.WriteLine("工程名:冒号前导词"); ProjectNameLeadingColonWord.WriteToLog(Program.Training); ProjectNameLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, ProjectNameLeadingColonWord.LeadingWordDict)); ContractNameSurround.WriteToLog(Program.Training); Program.Training.WriteLine("合同名:冒号前导词"); ContractNameLeadingColonWord.WriteToLog(Program.Training); ContractNameLeadingDict = Utility.ConvertRankToCIDict(Utility.FindTop(5, ContractNameLeadingColonWord.LeadingWordDict)); }
/// <summary> /// 使用LTP方式分析实体周边词语 /// </summary> public static void AnlayzeEntitySurroundWordsLTP() { var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\重大合同"; var JiaFangDP = new LTPTrainingDP(); var JiaFangSRL = new LTPTrainingSRL(); var YiFnagDP = new LTPTrainingDP(); var YiFnagSRL = new LTPTrainingSRL(); var ContractNameDP = new LTPTrainingDP(); var ContractNameSRL = new LTPTrainingSRL(); var ProjectNameDP = new LTPTrainingDP(); var ProjectNameSRL = new LTPTrainingSRL(); foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\")) { var fi = new System.IO.FileInfo(filename); var Id = fi.Name.Replace(".html", String.Empty); if (TraningDataset.GetContractById(Id).Count == 0) { continue; } var contract = TraningDataset.GetContractById(Id).First(); var c = new Contract(); c.Init(filename); if (!string.IsNullOrEmpty(contract.JiaFang)) { JiaFangDP.Training(c.Dplist, contract.JiaFang); JiaFangSRL.Training(c.Srllist, contract.JiaFang); } if (!string.IsNullOrEmpty(contract.YiFang)) { YiFnagDP.Training(c.Dplist, contract.YiFang); YiFnagSRL.Training(c.Srllist, contract.YiFang); } if (!string.IsNullOrEmpty(contract.ContractName)) { ContractNameDP.Training(c.Dplist, contract.ContractName); ContractNameSRL.Training(c.Srllist, contract.ContractName); } if (!string.IsNullOrEmpty(contract.ProjectName)) { ProjectNameDP.Training(c.Dplist, contract.ProjectName); ProjectNameSRL.Training(c.Srllist, contract.ProjectName); } } Program.Training.WriteLine("甲方附近词语分析(DP):"); JiaFangDP.WriteToLog(Program.Training); Program.Training.WriteLine("甲方附近词语分析(SRL):"); JiaFangSRL.WriteToLog(Program.Training); Program.Training.WriteLine("乙方附近词语分析(DP):"); YiFnagDP.WriteToLog(Program.Training); Program.Training.WriteLine("乙方附近词语分析(SRL):"); YiFnagSRL.WriteToLog(Program.Training); Program.Training.WriteLine("合同名附近词语分析(DP):"); ContractNameDP.WriteToLog(Program.Training); Program.Training.WriteLine("合同名附近词语分析(SRL):"); ContractNameSRL.WriteToLog(Program.Training); Program.Training.WriteLine("工程名附近词语分析(DP):"); ProjectNameDP.WriteToLog(Program.Training); Program.Training.WriteLine("工程名附近词语分析(SRL):"); ProjectNameSRL.WriteToLog(Program.Training); }