Пример #1
0
        string parse(string source)
        {
            StringWriter stringWriter = new StringWriter();
            string result;
            gen = new RSSFeedGenerator(stringWriter);

            //-------------------------------------------------
            // here is set values for header information for the RSS feed
            //-------------------------------------------------
            gen.Title = cate;
            gen.Description = "";
            gen.LastBuildDate = DateTime.Now;
            gen.Link = "http://sbhhbs.com";
            //gen.Category = "The optional category";
            gen.PubDate = DateTime.Now;

            // write the header of the RSS feed document
            gen.WriteStartDocument();

            // write the start channel for the RSS feed Items
            gen.WriteStartChannel();

            Regex qariRegex = new Regex("<a href='newsshow\\.aspx\\?sn=([0-9]*)'>.([^<]*)<[^(]*\\(([^)]*)\\)");
            MatchCollection mc = qariRegex.Matches(source);
            foreach (Match match in mc)
            {
                string newsid = match.Groups[1].Value;
                string title = match.Groups[2].Value;
                string date = match.Groups[3].Value;

                string content = "";
                var objList = parseEngine.GetObjectsWithQuery("RSS", new { url = newsid });
                bool cateresult = false;
                if (objList != null && objList.Length > 0)
                {
                    foreach (var obj in objList)
                    {

                        if (cate.Equals(obj["category"]))
                        {
                            cateresult = true;
                            break;
                        }
                    }
                    if(cateresult)
                        System.Console.WriteLine("Old item:" + title);
                }
                if(!cateresult)
                {
                    WebClient client = new WebClient();
                    client.Encoding = Encoding.UTF8;
                    String subURL = "http://www.tongji-uni.com/newsshow.aspx?sn=" + newsid;
                    string downloadString = client.DownloadString(subURL);
                    NSRange start = rangeOfStr(downloadString, "<table cellpadding=\"0\" cellspacing=\"2\" border=\"0\" width=\"100%\" class=\"wh\">");
                    NSRange end = rangeOfStr(downloadString, "<P><div align=right><a href=\"default.aspx\">【返回】</a></div></P>");
                    content = downloadString.Substring(start.location, end.location - start.location) + "</td></tr></table>";

                    var testObject = new Parse.ParseObject("RSS");
                    testObject["category"] = cate;
                    testObject["url"] = newsid;
                    testObject["title"] = title;
                    testObject["newsTime"] = String.Format("{0:yyyy-MM-dd HH:mm:ss}", DateTime.Parse(date));
                    testObject["content"] = saveTextToParse(content);
                    //Create a new object
                    testObject = parseEngine.CreateObject(testObject);
                    System.Console.WriteLine("New item:" + title);
                }

                gen.WriteItem(
                               title,
                               newsid,
                               "",
                               "sbhhbs",
                               "",
                               "",
                               "",
                               DateTime.Parse(date),
                               "",
                               "",
                               "",
                               "");
            }

            // end the channel
            gen.WriteEndChannel();
            try
            {
                // end the document
                gen.WriteEndDocument();
            }
            catch (System.Exception ex)
            {

            }
            finally
            {
                // close the rss generator and dispay to screen
                gen.Close();

                result = stringWriter.ToString();
            }
            //result = StringWriter.to
            return result;
        }
Пример #2
0
        string parse(string source)
        {
            source = convertToUTF8fromGB2312(source);

            StringWriter stringWriter = new StringWriter();
            string result;
            gen = new RSSFeedGenerator(stringWriter);

            //-------------------------------------------------
            // here is set values for header information for the RSS feed
            //-------------------------------------------------
            gen.Title = cate;
            gen.Description = "";
            gen.LastBuildDate = DateTime.Now;
            gen.Link = "http://sbhhbs.com";
            //gen.Category = "The optional category";
            gen.PubDate = DateTime.Now;

            // write the header of the RSS feed document
            gen.WriteStartDocument();

            // write the start channel for the RSS feed Items
            gen.WriteStartChannel();

            Regex qariRegex = new Regex("<a href=\"news_detail\\.asp\\?id=([0-9]*)\">([^<]*)");
            MatchCollection mc = qariRegex.Matches(source);
            foreach (Match match in mc)
            {
                string newsid = match.Groups[1].Value;
                string title = match.Groups[2].Value;
                WebClient client = new WebClient();
                client.Encoding = Encoding.Default;
                String subURL = "http://old.tongji-caup.org/student/news_detail.asp?id=" + newsid;
                string downloadString = client.DownloadString(subURL);
                downloadString = convertToUTF8fromGB2312(downloadString);

                string time = "2012-10-3";
                NSRange start = rangeOfStr(downloadString, "发布时间:");
                NSRange end = rangeOfStr(downloadString, "&nbsp;&nbsp; 信息来源:");
                time = downloadString.Substring(start.location + start.length, end.location - start.location - 5);

                start = rangeOfStr(downloadString, "<td  class=\"TLE\">");
                end = rangeOfStr(downloadString, "<!--内容-->");
                string content = downloadString.Substring(start.location, end.location - start.location) + "</td>";

                var objList = parseEngine.GetObjectsWithQuery("RSS", new { url = newsid });
                bool cateresult = false;
                if (objList != null && objList.Length > 0)
                {
                    foreach (var obj in objList)
                    {

                        if (cate.Equals(obj["category"]))
                        {
                            cateresult = true;
                            break;
                        }
                    }
                    if(cateresult)
                        System.Console.WriteLine("Old item:" + title);
                }
                if(!cateresult)
                {
                    var testObject = new Parse.ParseObject("RSS");
                    testObject["category"] = cate;
                    testObject["url"] = newsid;
                    testObject["title"] = title;
                    testObject["newsTime"] = String.Format("{0:yyyy-MM-dd HH:mm:ss}", DateTime.Parse(time));
                    testObject["content"] = saveTextToParse(content);
                    //Create a new object
                    testObject = parseEngine.CreateObject(testObject);
                    System.Console.WriteLine("New item:" + title);
                }

                gen.WriteItem(
                               title,
                               newsid,
                               "",
                               "sbhhbs",
                               "",
                               "",
                               "",
                               DateTime.Parse(time),
                               "",
                               "",
                               "",
                               "");
            }

            // end the channel
            gen.WriteEndChannel();
            try
            {
                // end the document
                gen.WriteEndDocument();
            }
            catch (System.Exception ex)
            {

            }
            finally
            {
                // close the rss generator and dispay to screen
                gen.Close();

                result = stringWriter.ToString();
            }
            //result = StringWriter.to
            return result;
        }
Пример #3
0
        string parse(string source)
        {
            source = convertToUTF8fromGB2312(source);

            StringWriter stringWriter = new StringWriter();
            string result;
            gen = new RSSFeedGenerator(stringWriter);

            //-------------------------------------------------
            // here is set values for header information for the RSS feed
            //-------------------------------------------------
            gen.Title = "建筑与城市规划";
            gen.Description = "";
            gen.LastBuildDate = DateTime.Now;
            gen.Link = "http://sbhhbs.com";
            //gen.Category = "The optional category";
            gen.PubDate = DateTime.Now;

            // write the header of the RSS feed document
            gen.WriteStartDocument();

            // write the start channel for the RSS feed Items
            gen.WriteStartChannel();

            Regex qariRegex = new Regex("<a href=\"news_detail\\.asp\\?id=([0-9]*)\">([^<]*)");
            MatchCollection mc = qariRegex.Matches(source);
            foreach (Match match in mc)
            {
                string id = match.Groups[1].Value;
                string title = match.Groups[2].Value;

                WebClient client = new WebClient();
                client.Encoding = Encoding.Default;
                String subURL = "http://old.tongji-caup.org/student/news_detail.asp?id=" + id;
                string downloadString = client.DownloadString(subURL);
                downloadString = convertToUTF8fromGB2312(downloadString);

                string time = "2012-10-3";
                NSRange start = rangeOfStr(downloadString, "发布时间:");
                NSRange end = rangeOfStr(downloadString, "&nbsp;&nbsp; 信息来源:");
                time = downloadString.Substring(start.location+start.length, end.location - start.location - 5);

                start = rangeOfStr(downloadString, "<td  class=\"TLE\">");
                end = rangeOfStr(downloadString, "<!--内容-->");
                string content = downloadString.Substring(start.location, end.location - start.location)+"</td>";

                gen.WriteItem(
                               title,
                               id,
                               "",
                               "sbhhbs",
                               "",
                               "",
                               "",
                               DateTime.Parse(time),
                               content,
                               "",
                               "",
                               "");

            }

            // end the channel
            gen.WriteEndChannel();
            try
            {
                // end the document
                gen.WriteEndDocument();
            }
            catch (System.Exception ex)
            {

            }
            finally
            {
                // close the rss generator and dispay to screen
                gen.Close();

                result = stringWriter.ToString();
            }
            //result = StringWriter.to
            return result;
        }