public static void RunWordAnlayze() { var root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1044779.html"); var Contract = TraningDataset.GetContractById("1044779")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1450.html"); Contract = TraningDataset.GetContractById("1450")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1042224.html"); Contract = TraningDataset.GetContractById("1042224")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\917362.html"); Contract = TraningDataset.GetContractById("917362")[0]; EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, Contract.ProjectName); }
public static void AnlayzeEntitySurroundWords() { var ContractPath_TRAIN = Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同"; Console.WriteLine("前导词:甲方"); foreach (var filename in System.IO.Directory.GetFiles(ContractPath_TRAIN + @"\html\")) { var fi = new System.IO.FileInfo(filename); var Id = fi.Name.Replace(".html", ""); if (TraningDataset.GetContractById(Id).Count == 0) { continue; } var contract = TraningDataset.GetContractById(Id).First(); if (contract.JiaFang == "") { continue; } var root = HTMLEngine.Anlayze(filename); EntityWordAnlayzeTool.AnlayzeEntitySurroundWords(root, contract.JiaFang); } }