string parse(string source) { StringWriter stringWriter = new StringWriter(); string result; gen = new RSSFeedGenerator(stringWriter); //------------------------------------------------- // here is set values for header information for the RSS feed //------------------------------------------------- gen.Title = cate; gen.Description = ""; gen.LastBuildDate = DateTime.Now; gen.Link = "http://sbhhbs.com"; //gen.Category = "The optional category"; gen.PubDate = DateTime.Now; // write the header of the RSS feed document gen.WriteStartDocument(); // write the start channel for the RSS feed Items gen.WriteStartChannel(); Regex qariRegex = new Regex("<a href='newsshow\\.aspx\\?sn=([0-9]*)'>.([^<]*)<[^(]*\\(([^)]*)\\)"); MatchCollection mc = qariRegex.Matches(source); foreach (Match match in mc) { string newsid = match.Groups[1].Value; string title = match.Groups[2].Value; string date = match.Groups[3].Value; string content = ""; var objList = parseEngine.GetObjectsWithQuery("RSS", new { url = newsid }); bool cateresult = false; if (objList != null && objList.Length > 0) { foreach (var obj in objList) { if (cate.Equals(obj["category"])) { cateresult = true; break; } } if(cateresult) System.Console.WriteLine("Old item:" + title); } if(!cateresult) { WebClient client = new WebClient(); client.Encoding = Encoding.UTF8; String subURL = "http://www.tongji-uni.com/newsshow.aspx?sn=" + newsid; string downloadString = client.DownloadString(subURL); NSRange start = rangeOfStr(downloadString, "<table cellpadding=\"0\" cellspacing=\"2\" border=\"0\" width=\"100%\" class=\"wh\">"); NSRange end = rangeOfStr(downloadString, "<P><div align=right><a href=\"default.aspx\">【返回】</a></div></P>"); content = downloadString.Substring(start.location, end.location - start.location) + "</td></tr></table>"; var testObject = new Parse.ParseObject("RSS"); testObject["category"] = cate; testObject["url"] = newsid; testObject["title"] = title; testObject["newsTime"] = String.Format("{0:yyyy-MM-dd HH:mm:ss}", DateTime.Parse(date)); testObject["content"] = saveTextToParse(content); //Create a new object testObject = parseEngine.CreateObject(testObject); System.Console.WriteLine("New item:" + title); } gen.WriteItem( title, newsid, "", "sbhhbs", "", "", "", DateTime.Parse(date), "", "", "", ""); } // end the channel gen.WriteEndChannel(); try { // end the document gen.WriteEndDocument(); } catch (System.Exception ex) { } finally { // close the rss generator and dispay to screen gen.Close(); result = stringWriter.ToString(); } //result = StringWriter.to return result; }
string parse(string source) { source = convertToUTF8fromGB2312(source); StringWriter stringWriter = new StringWriter(); string result; gen = new RSSFeedGenerator(stringWriter); //------------------------------------------------- // here is set values for header information for the RSS feed //------------------------------------------------- gen.Title = cate; gen.Description = ""; gen.LastBuildDate = DateTime.Now; gen.Link = "http://sbhhbs.com"; //gen.Category = "The optional category"; gen.PubDate = DateTime.Now; // write the header of the RSS feed document gen.WriteStartDocument(); // write the start channel for the RSS feed Items gen.WriteStartChannel(); Regex qariRegex = new Regex("<a href=\"news_detail\\.asp\\?id=([0-9]*)\">([^<]*)"); MatchCollection mc = qariRegex.Matches(source); foreach (Match match in mc) { string newsid = match.Groups[1].Value; string title = match.Groups[2].Value; WebClient client = new WebClient(); client.Encoding = Encoding.Default; String subURL = "http://old.tongji-caup.org/student/news_detail.asp?id=" + newsid; string downloadString = client.DownloadString(subURL); downloadString = convertToUTF8fromGB2312(downloadString); string time = "2012-10-3"; NSRange start = rangeOfStr(downloadString, "发布时间:"); NSRange end = rangeOfStr(downloadString, " 信息来源:"); time = downloadString.Substring(start.location + start.length, end.location - start.location - 5); start = rangeOfStr(downloadString, "<td class=\"TLE\">"); end = rangeOfStr(downloadString, "<!--内容-->"); string content = downloadString.Substring(start.location, end.location - start.location) + "</td>"; var objList = parseEngine.GetObjectsWithQuery("RSS", new { url = newsid }); bool cateresult = false; if (objList != null && objList.Length > 0) { foreach (var obj in objList) { if (cate.Equals(obj["category"])) { cateresult = true; break; } } if(cateresult) System.Console.WriteLine("Old item:" + title); } if(!cateresult) { var testObject = new Parse.ParseObject("RSS"); testObject["category"] = cate; testObject["url"] = newsid; testObject["title"] = title; testObject["newsTime"] = String.Format("{0:yyyy-MM-dd HH:mm:ss}", DateTime.Parse(time)); testObject["content"] = saveTextToParse(content); //Create a new object testObject = parseEngine.CreateObject(testObject); System.Console.WriteLine("New item:" + title); } gen.WriteItem( title, newsid, "", "sbhhbs", "", "", "", DateTime.Parse(time), "", "", "", ""); } // end the channel gen.WriteEndChannel(); try { // end the document gen.WriteEndDocument(); } catch (System.Exception ex) { } finally { // close the rss generator and dispay to screen gen.Close(); result = stringWriter.ToString(); } //result = StringWriter.to return result; }
string parse(string source) { source = convertToUTF8fromGB2312(source); StringWriter stringWriter = new StringWriter(); string result; gen = new RSSFeedGenerator(stringWriter); //------------------------------------------------- // here is set values for header information for the RSS feed //------------------------------------------------- gen.Title = "建筑与城市规划"; gen.Description = ""; gen.LastBuildDate = DateTime.Now; gen.Link = "http://sbhhbs.com"; //gen.Category = "The optional category"; gen.PubDate = DateTime.Now; // write the header of the RSS feed document gen.WriteStartDocument(); // write the start channel for the RSS feed Items gen.WriteStartChannel(); Regex qariRegex = new Regex("<a href=\"news_detail\\.asp\\?id=([0-9]*)\">([^<]*)"); MatchCollection mc = qariRegex.Matches(source); foreach (Match match in mc) { string id = match.Groups[1].Value; string title = match.Groups[2].Value; WebClient client = new WebClient(); client.Encoding = Encoding.Default; String subURL = "http://old.tongji-caup.org/student/news_detail.asp?id=" + id; string downloadString = client.DownloadString(subURL); downloadString = convertToUTF8fromGB2312(downloadString); string time = "2012-10-3"; NSRange start = rangeOfStr(downloadString, "发布时间:"); NSRange end = rangeOfStr(downloadString, " 信息来源:"); time = downloadString.Substring(start.location+start.length, end.location - start.location - 5); start = rangeOfStr(downloadString, "<td class=\"TLE\">"); end = rangeOfStr(downloadString, "<!--内容-->"); string content = downloadString.Substring(start.location, end.location - start.location)+"</td>"; gen.WriteItem( title, id, "", "sbhhbs", "", "", "", DateTime.Parse(time), content, "", "", ""); } // end the channel gen.WriteEndChannel(); try { // end the document gen.WriteEndDocument(); } catch (System.Exception ex) { } finally { // close the rss generator and dispay to screen gen.Close(); result = stringWriter.ToString(); } //result = StringWriter.to return result; }