static void test4() { string originString = (new StreamReader("03,073.txt")).ReadToEnd(); DateTime dt = DateTime.Now; Reaper reaper = new Reaper(originString); foreach (Reaper part in reaper.RemainBeforeFirst("<table width=\"100%\" summary=\"table used for formatting\"><tr><td>").ReapByProfix("<hr><big><b><i>")) { string partName = part.RemainBeforeFirst(":</i></b></big>").GetResult()[0]; foreach (Reaper table in part.ReapByProfix("<div align=\"center\"><table border=1 summary=\"")) { string tableName = table.RemainBeforeFirst("\" width=\"95%\">").GetResult()[0]; foreach (Reaper line in table.ReapByProfix("<tr><td>").GiveUpContain("<td>Jan</td><td>Feb</td><td>Mar</td>")) { string lineName = line.RemainBeforeFirst("</td>").GetResult()[0]; ///////// foreach (Reaper data in line.ReapByProfix("<td align=\"center\" nowrap>").RemainBeforeFirst("</td>")) { ///////// } ///////////// } } } //////////// TimeSpan ts = DateTime.Now - dt; Console.WriteLine(ts); }
static void test3() { Reaper reaper = new Reaper((new StreamReader("03,073.html")).ReadToEnd()); StreamWriter outputFile = new StreamWriter("test3_output.txt"); foreach (Reaper part in reaper.RemainBeforeFirst("<table width=\"100%\" summary=\"table used for formatting\"><tr><td>").ReapByProfix("<hr><big><b><i>")) { string partName = part.RemainBeforeFirst(":</i></b></big>").GetResult()[0]; ShowStrings(part.RemainBeforeFirst(":</i></b></big>").GetResult()); foreach (Reaper table in part.ReapByProfix("<div align=\"center\"><table border=1 summary=\"")) { string tableName = table.RemainBeforeFirst("\" width=\"95%\">").GetResult()[0]; int lineCount = 0; foreach (Reaper line in table.ReapByProfix("<tr><td>").GiveUpContain("<td>Jan</td><td>Feb</td><td>Mar</td>")) { string lineName = line.RemainBeforeFirst("</td>").GetResult()[0]; outputFile.WriteLine("PART:" + partName); outputFile.WriteLine("TABLE:" + tableName); outputFile.WriteLine("LINE:" + lineName); outputFile.WriteLine("NO.:" + ++lineCount); foreach (Reaper data in line.ReapByProfix("<td align=\"center\" nowrap>").RemainBeforeFirst("</td>")) { outputFile.Write(data.GetResult()[0] + " "); } outputFile.WriteLine(); } } } outputFile.Close(); }
static void test1() { Reaper reaper = new Reaper((new StreamReader("03,073.html")).ReadToEnd()); StreamWriter outputFile = new StreamWriter("test1_output.txt"); List <string> partResult = reaper .RemainBeforeFirst("<table width=\"100%\" summary=\"table used for formatting\"><tr><td>") .ReapByProfix("<hr><big><b><i>") .GetResult(); foreach (var part in partResult) { string partName = (new Reaper(part)).RemainBeforeFirst(":</i></b></big>").GetResult()[0]; ShowStrings((new Reaper(part)).RemainBeforeFirst(":</i></b></big>").GetResult()); outputFile.WriteLine("----------PART:" + partName); List <string> tableResult = (new Reaper(part)) .ReapByProfix("<div align=\"center\"><table border=1 summary=\"") .GetResult(); foreach (var table in tableResult) { string tableName = (new Reaper(table)).RemainBeforeFirst("\" width=\"95%\">").GetResult()[0]; //ShowStrings((new Reaper(table)).RemainBeforeFirst("\" width=\"95%\">").GetResult()); outputFile.WriteLine("-----TABLE:" + tableName); List <string> lineResult = (new Reaper(table)) .ReapByProfix("<tr><td>") .GiveUpContain("<td>Jan</td><td>Feb</td><td>Mar</td>") .GetResult(); int lineCount = 0; foreach (var line in lineResult) { string lineName = (new Reaper(line)).RemainBeforeFirst("</td>").GetResult()[0]; //ShowStrings((new Reaper(line)).RemainBeforeFirst("</td>").GetResult()); outputFile.WriteLine("LINE:" + lineName + " " + ++lineCount); List <string> dataResult = (new Reaper(line)) .ReapByProfix("<td align=\"center\" nowrap>") .RemainBeforeFirst("</td>") .GetResult(); foreach (var data in dataResult) { outputFile.Write(data + " "); } outputFile.WriteLine(); } } } outputFile.Close(); }
public bool 提取() { StringWriter strWriter = new StringWriter(); //TimeSpan repearTime = new TimeSpan(); string originString = (new StreamReader(_inFileName)).ReadToEnd(); //处理重大漏洞:HTML代码中,部分行名是包含在</tr>\n<tr><td align=\"center\">中,而大多数是包含在</tr>\n<tr><td>中 originString = originString.Replace("</tr>\n<tr><td align=\"center\">", "</tr>\n<tr><td>"); //DateTime startTime = DateTime.Now; Reaper reaper = new Reaper(originString); string latStr = reaper.RemainAfterFirst("<br>Latitude <b>").RemainBeforeFirst("</b>").GetResult()[0]; string lonStr = reaper.RemainAfterFirst("<br>Longitude <b>").RemainBeforeFirst("</b>").GetResult()[0]; //repearTime += DateTime.Now - startTime; strWriter.Write("LATLON:"); strWriter.WriteLine(latStr + " " + lonStr); //startTime = DateTime.Now; foreach (Reaper part in reaper.RemainBeforeFirst("<table width=\"100%\" summary=\"table used for formatting\"><tr><td>").ReapByProfix("<hr><big><b><i>")) { string partName = part.RemainBeforeFirst(":</i></b></big>").GetResult()[0]; //ShowStrings(part.RemainBeforeFirst(":</i></b></big>").GetResult()); foreach (Reaper table in part.ReapByProfix("<div align=\"center\"><table border=1 summary=\"")) { string tableName = table.RemainBeforeFirst("\" width=\"95%\">").GetResult()[0]; int lineCount = 0; foreach (Reaper line in table.ReapByProfix("<tr><td>").GiveUpContain("<td>Jan</td><td>Feb</td><td>Mar</td>")) { string lineName = line.RemainBeforeFirst("</td>").GetResult()[0]; //repearTime += DateTime.Now - startTime; strWriter.WriteLine("PART:" + partName); strWriter.WriteLine("TABLE:" + tableName); strWriter.WriteLine("LINE:" + lineName); strWriter.WriteLine("NUM:" + ++lineCount); strWriter.Write("DATA:"); //startTime = DateTime.Now; foreach (Reaper data in line.ReapByProfix("<td align=\"center\" nowrap>").RemainBeforeFirst("</td>")) { //repearTime += DateTime.Now - startTime; strWriter.Write(data.GetResult()[0] + " "); //startTime = DateTime.Now; } strWriter.WriteLine(); } } } //处理HTML的转义符 string result = strWriter.ToString(); result = result.Replace("<", "<"); result = result.Replace(">", ">"); result = result.Replace("°", "°"); //处理Average Daily Temperature Range 多出的* result = result.Replace("* ", ""); try { StreamWriter _outFile = new StreamWriter(_outFileName); _outFile.Write(result); _outFile.Close(); } catch (Exception ex) { Program.errLog.WriteLine(ex.Message); Program.errLog.Flush(); } //Console.WriteLine("完成:" + _inFileName + " -> " + _outFileName); //Console.WriteLine("Reaper\n耗时" + repearTime); return(true); }
public bool 提取() { StringWriter strWriter = new StringWriter(); //TimeSpan repearTime = new TimeSpan(); string originString = (new StreamReader(_inFileName)).ReadToEnd(); //处理重大漏洞:HTML代码中,部分行名是包含在</tr>\n<tr><td align=\"center\">中,而大多数是包含在</tr>\n<tr><td>中 originString = originString.Replace("</tr>\n<tr><td align=\"center\">", "</tr>\n<tr><td>"); //DateTime startTime = DateTime.Now; Reaper reaper = new Reaper(originString); string latStr = reaper.RemainAfterFirst("<br>Latitude <b>").RemainBeforeFirst("</b>").GetResult()[0]; string lonStr = reaper.RemainAfterFirst("<br>Longitude <b>").RemainBeforeFirst("</b>").GetResult()[0]; //repearTime += DateTime.Now - startTime; strWriter.Write("LATLON:"); strWriter.WriteLine(latStr + " " + lonStr); //startTime = DateTime.Now; foreach (Reaper part in reaper.RemainBeforeFirst("<table width=\"100%\" summary=\"table used for formatting\"><tr><td>").ReapByProfix("<hr><big><b><i>")) { string partName = part.RemainBeforeFirst(":</i></b></big>").GetResult()[0]; //ShowStrings(part.RemainBeforeFirst(":</i></b></big>").GetResult()); foreach (Reaper table in part.ReapByProfix("<div align=\"center\"><table border=1 summary=\"")) { string tableName = table.RemainBeforeFirst("\" width=\"95%\">").GetResult()[0]; int lineCount = 0; foreach (Reaper line in table.ReapByProfix("<tr><td>").GiveUpContain("<td>Jan</td><td>Feb</td><td>Mar</td>")) { string lineName = line.RemainBeforeFirst("</td>").GetResult()[0]; //repearTime += DateTime.Now - startTime; strWriter.WriteLine("PART:" + partName); strWriter.WriteLine("TABLE:" + tableName); strWriter.WriteLine("LINE:" + lineName); strWriter.WriteLine("NUM:" + ++lineCount); strWriter.Write("DATA:"); //startTime = DateTime.Now; foreach (Reaper data in line.ReapByProfix("<td align=\"center\" nowrap>").RemainBeforeFirst("</td>")) { //repearTime += DateTime.Now - startTime; strWriter.Write(data.GetResult()[0] + " "); //startTime = DateTime.Now; } strWriter.WriteLine(); } } } //处理HTML的转义符 string result = strWriter.ToString(); result = result.Replace("<", "<"); result = result.Replace(">", ">"); result = result.Replace("°", "°"); //处理Average Daily Temperature Range 多出的* result = result.Replace("* ", ""); try { StreamWriter _outFile = new StreamWriter(_outFileName); _outFile.Write(result); _outFile.Close(); } catch(Exception ex) { Program.errLog.WriteLine(ex.Message); Program.errLog.Flush(); } //Console.WriteLine("完成:" + _inFileName + " -> " + _outFileName); //Console.WriteLine("Reaper\n耗时" + repearTime); return true; }