public void SetText(NewsObject no) { label9.Text = no.type_name; label2.Text = no.title; label4.Text = no.pubdate; label6.Text = no.source; int i = int.Parse(count.Text); i++; count.Text = i.ToString(); i = int.Parse(all.Text); i++; all.Text = i.ToString(); string time = string.Format("{0: HH:mm:ss}", DateTime.Now); string s = " "; if (label13.Text.StartsWith(s)) { s = ""; } if (i%2 == 0) { label16.Text = ""; } else { label16.Text = "√:"; } if (no.status == 0) label13.Text = s+"插入成功 --"+time; else label13.Text = s+"成功更新 --"+time; }
private int InsertItem(NewsObject no) { string title = no.title.Replace("'","''"); string type_id = no.type_id; string content = no.content.Replace("'", "''"); string source = no.source; string pubdate = no.pubdate; string misc = no.misc; if (content.Contains("?")) content = content.Replace('?', ' '); string hrefs = MyClass.ExtractStr(content, "href", "<", ">", 1000, true); foreach (string item in hrefs.Split(',')) { if (item.StartsWith("a ") || item.StartsWith("A ")) content = content.Replace("<" + item + ">", "<a>"); } string sql = "INSERT INTO data_xp_news(title,type_id,content,source,misc,pubdate) VALUES('{0}',{1},'{2}','{3}','{4}','{5}')"; sql = string.Format(sql, title.Replace("'", "''"), type_id, content.Replace("'", "''"), source.Replace("'", "''"), misc, pubdate); if (!MyClass.SqlExists("select * from data_xp_news where title='" + title + "'")) { MyClass.ExecuteNonQuery(sql); return 0; } else { sql = string.Format("update data_xp_news set content='{0}',source='{1}',misc='{2}',pubdate='{3}' where title='{4}'", content, source, misc, pubdate, title); MyClass.ExecuteNonQuery(sql); return 1; } }
NewsObject GetNewsObject(string title, string url,string type) { NewsObject no = new NewsObject(); string allc = ""; MyClass.GetUrltoHtml(url,"utf-8"); if (type == "本地新闻")//本地新闻提取方法 { allc = MyClass.GetUrltoHtml(url, "utf-8"); string str1 = MyClass.ExtractStr(allc, "class=\"article\"", "<div", "div>", 1, true,","); if (str1 != "") str1 = "<div" + str1 + "div>"; string time = MyClass.GetNowTime(); string resource = ""; DateTime dt = DateTime.Now; try { time = MyClass.ExtractStr(allc, "class=\"info\"", ">", " 来", 1, false, ",").Trim(); resource = MyClass.ExtractStr(allc, "class=\"info", "来源:", "</div>", 1, false, ",").Split('<')[2].Split('>')[1]; dt = DateTime.Parse(time); } catch (Exception) { } time = MyClass.FormateTime(dt); no.title = title; no.pubdate = time; no.source = resource; no.content = str1; } else { allc = MyClass.GetUrltoHtml(url,"gb2312"); string str1 = MyClass.ExtractStr(allc, "id=\"contentText\"", "<div", "div>", 1, true,","); if (str1 != "") str1 = "<div" + str1 + "div>"; string time = MyClass.ExtractStr(allc, "class=\"l\"", ">", "<", 1, false, ","); string resource = MyClass.ExtractStr(allc, "id=\"media_span\"", ">", "<", 1, false, ","); if (resource == "") { resource = MyClass.ExtractStr(allc, "itemprop=\"name\"", ">", "<", 1, false, ","); } if (time == "") { time = MyClass.ExtractStr(allc, "class=\"time\"", ">", "<", 1, false, ","); } DateTime dt; try { dt = DateTime.Parse(time); time = MyClass.FormateTime(dt); } catch (Exception) { time = MyClass.FormateTime(DateTime.Now); } no.title = title; no.pubdate = time; no.source = resource; no.content = str1; } return no; }
NewsObject GetNewsObject(string title, string url,string type) { NewsObject no = new NewsObject(); string allc = MyClass.GetUrltoHtml(url); allc = MyClass.GetUrltoHtml(url,"gb2312"); string str1 = MyClass.ExtractStr(allc, "", "<div id=\"tupian_div\"", "<div class=\"left_name\"", 1); if (str1 != "") { string script = MyClass.ExtractStr(str1, "", "<script", "</script>", 100, false,","); if(script!="") foreach (string sc in script.Split(',')) { if (sc != "") str1 = str1.Replace("<script" + sc + "</script>", ""); } if (str1.Contains("<img")) { string str11 = MyClass.ExtractStr(str1, "<img ", "<img", ">", 100, true); if (str11 != "") { string[] imgs = str11.Split(','); foreach (string ig in imgs) { string igsrc=MyClass.ExtractStr(ig, "src", "\"", "\""); if (igsrc != "" && !igsrc.StartsWith("http")) { if (igsrc.StartsWith("/")) { string snurl = "http://" + new Uri(url).Host + igsrc; str1 = str1.Replace(igsrc, snurl); } else { int ids = url.LastIndexOf("/"); if (ids != -1) { string snurl = url.Remove(ids+1) + igsrc; str1 = str1.Replace(igsrc, snurl); } } } } } } // str1 = "<div" + str1 + "div>"; str1 = "<div id=\"tupian_div\"" + str1 ; //string str2 = MyClass.ExtractStr(allc, "class=\"left-time\"", "<div", "div>", 1, true); //str2 = "<div" + str2 + "div>"; string str3 = MyClass.ExtractStr(allc, "class=\"left_name\"", "<div", "div>", 1, true); str3 = "<div" + str3 + "div>"; str1 =str1 + str3; //if (str2 != "") //{ // str1 = str1.Replace(str2, ""); //} } string time = MyClass.GetNowTime(); string[] timeandresource = MyClass.ExtractStr(allc, "\"left-t\"", ">", "<").Trim().Split(' '); string resource = ""; if (timeandresource.Length == 2) { resource = timeandresource[1].Replace("来源:", ""); } if (timeandresource.Length >0) { time = timeandresource[0].Replace("年", "-").Replace("月", "-").Replace("日", ""); } DateTime dt; try { dt = DateTime.Parse(time); time = MyClass.FormateTime(dt); } catch (Exception) { time = MyClass.FormateTime(DateTime.Now); } no.title = title; no.pubdate = time; no.source = resource; no.content = str1; return no; }
void GetNews() { Dictionary<string, string> bg = GetBigTypes(); progressBar1.Maximum = (bg.Count)*10+1; progressBar1.PerformStep(); int index = 0; foreach (KeyValuePair<string,string> item in bg) { index++; label15.Text = index.ToString()+"/"+bg.Count.ToString(); object type=MyClass.ExecuteScalar(string.Format("SELECT id from data_xp_newstype where typename='{0}'", item.Key)); if(type==null)return; string typeid = type.ToString(); Dictionary<string,string> sm=GetTitles(item.Value); if (sm == null) return; totle.Text = sm.Count.ToString(); count.Text = "0"; int j = 0; foreach (KeyValuePair<string, string> item1 in sm) { if (!newisContinue) { return; } j++; NewsObject no=new NewsObject(); bool isget = false; string sql="select * from data_xp_news where title='" + item1.Key + "'"; object[] neos= MyClass.GetObjects(sql, no.GetType()); if(neos!=null && neos.Length!=0) { no=(NewsObject)neos[0]; isget = true; } if (!checkBox1.Checked || !isget) { no = GetNewsObject(item1.Key, item1.Value, item.Key); no.type_id = typeid; no.type_name = item.Key; if (no.content != "") { no.status = InsertItem(no); } } no.type_id = typeid; no.type_name = item.Key; label1.Invoke(new SetTextDelegate(SetText), no); progressBar1.Value= 10*(index-1) + j*10/sm.Count+1; label14.Text = (((index-1) * 100 / bg.Count +j * 10 / sm.Count)).ToString() + "%"; } } }