public void SaveRuleTuniu() { #region 模拟11个ruleset //第1个条件 BaseRule rule1 = new BeginEndRule(@"<h1>", "</h1>", false, false, true, true); rule1.RuleNo = 10; rule1.Name = "标题rule"; RuleSet ruleset = new RuleSet(); ruleset.Name = "标题"; ruleset.Rules.Add(rule1); ruleset.Code = "title"; //第2个条件 BaseRule rule2 = new BeginEndRule("<span class=\"orange02\" >", "</span>", false, false, true, true); rule2.RuleNo = 10; rule2.Name = "等级rule"; RuleSet ruleset2 = new RuleSet(); ruleset2.Name = "等级"; ruleset2.Code = "level"; ruleset2.Rules.Add(rule2); //第3个条件 BaseRule rule3 = new BeginEndRule("<span class=\"misp2\">景点地址:", "</span>", false, false, true, true); rule3.RuleNo = 10; rule3.Name = "景区地址rule"; RuleSet ruleset3 = new RuleSet(); ruleset3.Name = "景区地址"; ruleset3.Rules.Add(rule3); ruleset3.Code = "scenicadd"; ruleset3.OldRegex = new List<string>() { @"入园凭证:.*\s*.*\s*.*" }; ruleset3.NewRegex = new List<string>() { @"取票凭证:</span><span class=""info_content""><p>凭身份证到景区售票窗口取票入园。</p>" }; //第4个条件 BaseRule rule4 = new BeginEndRule("fdsafsd", "fdasfsd", false, false, true, true); rule4.RuleNo = 10; rule4.Name = "seonamerule"; RuleSet ruleset4 = new RuleSet(); ruleset4.Name = "seoname"; ruleset4.Rules.Add(rule4); ruleset4.Code = "seoname"; //第5个条件 BaseRule rule5 = new BeginEndRule("<a name=\"nav_open\" class=\"n_nav_common\" href=\"javascript:void(0);\">", "<s></s>", false, false, true, true); rule5.RuleNo = 10; rule5.Name = "seonamerule0"; BaseRule rule51 = new BeginEndRule("<a name=\"nav_open\" class=\"n_nav_common\" href=\"javascript:void(0);\">", "<s></s>", false, false, true, true); rule51.RuleNo = 12; rule51.Name = "seonamerule1"; BaseRule rule52 = new BeginEndRule("<a name=\"nav_open\" class=\"n_nav_common\" href=\"javascript:void(0);\">", "<s></s>", false, false, true, true); rule52.RuleNo = 12; rule52.Name = "seonamerule2"; RuleSet ruleset5 = new RuleSet(); ruleset5.Name = "seoname"; ruleset5.Rules.Add(rule5); ruleset5.Rules.Add(rule51); ruleset5.Rules.Add(rule52); ruleset5.Code = "seoname"; //第6个条件 BaseRule rule6 = new BeginEndRule("fdsafsdfsd", "fdsafsdfsd", false, false, true, true); rule6.RuleNo = 10; rule6.Name = "景区主题rule"; RuleSet ruleset6 = new RuleSet(); ruleset6.Name = "景区主题"; ruleset6.Rules.Add(rule6); ruleset6.Code = "scenictopic"; //第7个条件 BaseRule rule7 = new BeginEndRule("fdsafsdfsd", "fdsafsdfsd", false, false, true, true); rule7.RuleNo = 10; rule7.Name = "topicseorule"; RuleSet ruleset7 = new RuleSet(); ruleset7.Name = "topicseo"; ruleset7.Rules.Add(rule7); ruleset7.Code = "fdsafsdfsd"; //第8个条件 BaseRule rule8 = new BeginEndRule("<ul class=\"traffic_infor\">", "</ul>", false, false, true, true); rule8.RuleNo = 10; rule8.Name = "交通指南rule"; RuleSet ruleset8 = new RuleSet(); ruleset8.Name = "交通指南"; ruleset8.Rules.Add(rule8); ruleset8.Code = "trafficeintro"; ruleset8.OldRegex = new List<string>() { "同程" }; ruleset8.NewRegex = new List<string>() { "旅游在线" }; //第9个条件 BaseRule rule9 = new BeginEndRule("<h2 id=\"ydxz\" class=\"detail_h2\"><span>(*)</span></h2>", @"</div> <div", true, false, true, true); rule9.RuleNo = 10; rule9.Name = "订票说明rule"; RuleSet ruleset9 = new RuleSet(); ruleset9.Name = "订票说明"; ruleset9.Rules.Add(rule9); ruleset9.Code = "bookintro"; ruleset9.OldRegex = new List<string>() { "同程", @"入园凭证:.*\s*.*\s*.*" }; ruleset9.NewRegex = new List<string>() { "旅游在线", @"取票凭证:</span><span class=""info_content""><p>凭身份证到景区售票窗口取票入园。</p>" }; //第10个条件 BaseRule rule10 = new BeginEndRule("<h2 id=\"jqjs\" class=\"detail_h2\"><span>(*)</span></h2>", @"</div> <div", true, false, true, true); rule10.RuleNo = 10; rule10.Name = "景区详情rule"; RuleSet ruleset10 = new RuleSet(); ruleset10.Name = "景区详情"; ruleset10.Rules.Add(rule10); ruleset10.Code = "scenicdetail"; ruleset10.NeedImageLocalizer = true; ruleset10.ImageLocalizerType = "detailimg"; ruleset10.OldRegex = new List<string>() { "同程" }; ruleset10.NewRegex = new List<string>() { "旅游在线" }; ruleset10.ImagePath = @"e:\testDetailimgLocalizer\"; ruleset10.VirtualPath = "/scenicimg/detailimg"; //第11个条件 BaseRule rule11 = new BeginEndRule("fdsafsdfsd", "</h1>", false, false, true, true); rule11.RuleNo = 10; rule11.Name = "景区简介rule"; RuleSet ruleset11 = new RuleSet(); ruleset11.Name = "景区简介"; ruleset11.Rules.Add(rule11); ruleset11.Code = "scenicintro"; ruleset11.OldRegex = new List<string>() { "同程" }; ruleset11.NewRegex = new List<string>() { "旅游在线" }; //第12个条件 string regexExp = @"id=""se_title_\d+"">.*?<span>(?<t_name>.*?)</span>.*?""parGd"">.?(?<t_price1>\d+)</span>.*?""Mne"">.</span>(?<price2>\d+)</dt>"; BaseRule rule12 = new RegexRule(regexExp); rule12.RuleNo = 10; rule12.Name = "价格rule"; RuleSet ruleset12 = new RuleSet(); ruleset12.Name = "价格"; ruleset12.Rules.Add(rule12); ruleset12.Code = "scenicprice"; BaseRule rule13 = new BeginEndRule(@"<ul class=""oUl"">", @"</ul>", false, false, true, true); rule13.RuleNo = 10; rule13.Name = "主图rule"; RuleSet ruleset13 = new RuleSet(); ruleset13.Name = "主图"; ruleset13.Rules.Add(rule13); ruleset13.Code = "mainimg"; ruleset13.NeedImageLocalizer = true; ruleset13.ImageLocalizerType = "mainimg"; ruleset13.ImagePath = @"e:\testMainimgLocalizer\"; ruleset13.VirtualPath = "/scenicimg/mainimg"; RuleAssembly assm = new RuleAssembly(); assm.CodeName = "Ass"; assm.Name = "rtuniucom"; ruleset.SetNo = 11; ruleset2.SetNo = 12; ruleset3.SetNo = 13; ruleset4.SetNo = 14; ruleset5.SetNo = 15; ruleset6.SetNo = 16; ruleset7.SetNo = 17; ruleset8.SetNo = 18; ruleset9.SetNo = 19; ruleset10.SetNo = 20; ruleset11.SetNo = 21; ruleset12.SetNo = 22; ruleset13.SetNo = 23; assm.RuleSets.Add(ruleset); assm.RuleSets.Add(ruleset2); assm.RuleSets.Add(ruleset3); assm.RuleSets.Add(ruleset4); assm.RuleSets.Add(ruleset5); assm.RuleSets.Add(ruleset6); assm.RuleSets.Add(ruleset7); assm.RuleSets.Add(ruleset8); assm.RuleSets.Add(ruleset9); assm.RuleSets.Add(ruleset10); assm.RuleSets.Add(ruleset11); assm.RuleSets.Add(ruleset12); assm.RuleSets.Add(ruleset13); #endregion IPersistence.IRule rule = new Persistence.Rule(); rule.PersistencePath = @"e:\downloadrules\"; rule.SaveRule(assm); //测试,是否存在该文件 Assert.IsTrue(File.Exists(@"e:\downloadrules\" + assm.Name + ".xml")); //测试,是否达到指定行数 //(xml[1]+assembly[2]+rulesetNum[z]*(ruleProperty[x]+rulesetProperty[y])) //x=12;y=4;z=2 得35 string[] filelines = File.ReadAllLines(@"e:\downloadrules\" + assm.Name + ".xml"); Assert.GreaterOrEqual(filelines.Count(), 35); //测试,第12行是否相同 //Assert.AreEqual("<RuleNo>10</RuleNo>", filelines[11].Trim()); }
public void ReadRule() { IPersistence.IRule rule = new Persistence.Rule(); rule.PersistencePath = @"d:\"; #region 写一个xml #region 模拟2个ruleset //第1个set BaseRule rule1 = new BeginEndRule("<div id=\"jqlast_maincontent\" class=\"jqlast_main_title\"><h1>", "</h1>", false, false, true, true); rule1.RuleNo = 10; rule1.Name = "标题rule"; RuleSet ruleset = new RuleSet(); ruleset.Name = "标题"; ruleset.Rules.Add(rule1); ruleset.Code = "title"; //第2个set BaseRule rule2 = new BeginEndRule("<span class=\"grade\">", "</span>", false, false, true, true); rule2.RuleNo = 10; rule2.Name = "等级rule"; RuleSet ruleset2 = new RuleSet(); ruleset2.Name = "等级"; ruleset2.Code = "level"; ruleset2.Rules.Add(rule2); //第3个set string regexExp = @"id=""se_title_\d+"">.*?<span>(?<t_name>.*?)</span>.*?""parGd"">.?(?<t_price1>\d+)</span>.*?""Mne"">.</span>(?<price2>\d+)</dt>"; BaseRule rule3 = new RegexRule(regexExp); rule3.RuleNo = 10; rule3.Name = "价格rule"; RuleSet ruleset3 = new RuleSet(); ruleset3.Name = "价格"; ruleset3.Code = "price"; ruleset3.Rules.Add(rule3); RuleAssembly assm = new RuleAssembly(); assm.CodeName = "Ass"; assm.Name = "tongchengcom"; ruleset.SetNo = 10; ruleset2.SetNo = 11; ruleset3.SetNo = 12; assm.RuleSets.Add(ruleset); assm.RuleSets.Add(ruleset2); assm.RuleSets.Add(ruleset3); #endregion rule.SaveRule(assm); #endregion #region 读一个xml CE.Domain.Rule.RuleAssembly ra = rule.ReadRule(assm.Name); CE.Domain.Rule.BeginEndRule ber = (CE.Domain.Rule.BeginEndRule)ra.RuleSets[0].Rules[0]; Assert.AreEqual(ber.Name, "标题rule"); Assert.AreEqual(ber.RuleNo.ToString(), "10"); Assert.AreEqual(ber.Enabled.ToString(), "True"); Assert.AreEqual(ber.BeginMark, "<div id=\"jqlast_maincontent\" class=\"jqlast_main_title\"><h1>"); Assert.AreEqual(ber.EndMark, "</h1>"); CE.Domain.Rule.BeginEndRule ber1 = ra.RuleSets[0].Rules[0] as CE.Domain.Rule.BeginEndRule; Assert.IsNotNull(ber1); CE.Domain.Rule.RegexRule ber2 = ra.RuleSets[2].Rules[0] as CE.Domain.Rule.RegexRule; Assert.IsNotNull(ber1); #endregion }
public void SaveRuleToncheng() { #region 模拟11个ruleset //第1个条件 BaseRule rule1 = new BeginEndRule("<div id=\"jqlast_maincontent\" class=\"jqlast_main_title\"><h1>", "</h1>", false, false, true, true); rule1.RuleNo = 10; rule1.Name = "标题rule"; RuleSet ruleset = new RuleSet(); ruleset.Name = "标题"; ruleset.Rules.Add(rule1); ruleset.Code = "title"; //第2个set BaseRule rule3 = new BeginEndRule("<span class=\"grade\">", "</span>", false, false, true, true); rule3.RuleNo = 10; rule3.Name = "等级rule"; RuleSet ruleset2 = new RuleSet(); ruleset2.Name = "等级"; ruleset2.Code = "level"; ruleset2.Rules.Add(rule3); RuleAssembly assm = new RuleAssembly(); assm.CodeName = "Ass"; assm.Name = "tongcheng"; ruleset.SetNo = 10; ruleset2.SetNo = 11; assm.RuleSets.Add(ruleset); assm.RuleSets.Add(ruleset2); #endregion IPersistence.IRule rule = new Persistence.Rule(); rule.PersistencePath = @"d:\"; rule.SaveRule(assm); //测试,是否存在该文件 Assert.IsTrue(File.Exists(@"d:\" + assm.Name + ".xml")); //测试,是否达到指定行数 //(xml[1]+assembly[2]+rulesetNum[z]*(ruleProperty[x]+rulesetProperty[y])) //x=12;y=4;z=2 得35 string[] filelines = File.ReadAllLines(@"d:\" + assm.Name + ".xml"); Assert.GreaterOrEqual(filelines.Count(), 35); //测试,第12行是否相同 Assert.AreEqual("<RuleNo>10</RuleNo>", filelines[11].Trim()); }
public void Persistence2Excel8Html(string htmlPath, string rulePath, string savePath,string savePricePath) { List<string> htmllist = new List<string>(); //查看是否存在html文件, 并添加到列表中 if (Directory.Exists(htmlPath)) { foreach (string d in Directory.GetFileSystemEntries(htmlPath)) { htmllist.Add(d); } } else { return; } //查看 IRule rule = new Persistence.Rule(); rule.PersistencePath = Path.GetDirectoryName(rulePath); ruleassembly = rule.ReadRule(Path.GetFileNameWithoutExtension(rulePath)); ExcelOpr.ExcelOpr excelopr = new ExcelOpr.ExcelOpr(); for (int i = 0; i < htmllist.Count; i++) { string html = htmlHandler.ReadHtml(htmllist[i]); string result = ruleassembly.FilterUsingAssembly(html, false); excelopr.Persistence2Excel(i + 2, result, savePath,savePricePath); } }