public static RssSource GetOneTask() { string loginUrl = strApiUrl + "/index.php/api/index/get_one_task"; RssSource rs = new RssSource(); Encoding encoding = Encoding.GetEncoding("utf-8"); IDictionary <string, string> parameters = new Dictionary <string, string>(); HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null); StreamReader reader = new StreamReader(response.GetResponseStream(), encoding); string result = reader.ReadToEnd(); JObject jo = JObject.Parse(result); string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray(); JArray ja = (JArray)JsonConvert.DeserializeObject(values[3]); foreach (JToken jt in ja) { rs.strSiteName = jt["site_name"].ToString(); rs.strSiteCode = jt["site_code"].ToString(); rs.strSiteUrl = jt["site_url"].ToString(); rs.strArticleUrlPattern = jt["article_url_pattern"].ToString(); rs.strArticleUrlRangeCssPath = jt["article_url_range"].ToString(); } return(rs); //Console.WriteLine("timerGetLinks_Elapsed"); }
public static void GetAllRules() { string loginUrl = strApiUrl + "/index.php/api/index/get_all_rules"; RssSource rs = new RssSource(); Encoding encoding = Encoding.GetEncoding("utf-8"); IDictionary <string, string> parameters = new Dictionary <string, string>(); HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null); StreamReader reader = new StreamReader(response.GetResponseStream(), encoding); string result = reader.ReadToEnd(); JObject jo = JObject.Parse(result); string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray(); JArray ja = (JArray)JsonConvert.DeserializeObject(values[3]); foreach (JToken jt in ja) { string strArticle_url_pattern = jt["article_url_pattern"].ToString(); string strArticle_content_csspath = jt["article_content_csspath"].ToString(); m_dicSiteRules.Add(strArticle_url_pattern, strArticle_content_csspath); } return; //Console.WriteLine("timerGetLinks_Elapsed"); }
public static int IsLinkExist(int nOffset1, int nOffset2) { string loginUrl = strApiUrl + "/index.php/api/index/is_link_exist"; RssSource rs = new RssSource(); Encoding encoding = Encoding.GetEncoding("utf-8"); IDictionary <string, string> parameters = new Dictionary <string, string>(); parameters.Add("offset1", nOffset1.ToString()); parameters.Add("offset2", nOffset2.ToString()); HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null); StreamReader reader = new StreamReader(response.GetResponseStream(), encoding); string result = reader.ReadToEnd(); JObject jo = JObject.Parse(result); string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray(); return(int.Parse(values[1])); //Console.WriteLine("timerGetLinks_Elapsed"); }
public static void timerGetLinks_Elapsed(object sender, System.Timers.ElapsedEventArgs e) { RssSource rs = GetOneTask(); WebDownloader wd = new WebDownloader(); Encoding ec = Encoding.GetEncoding("UTF-8"); string strContent = wd.GetPageByHttpWebRequest(rs.strSiteUrl, ec, ""); GetSiteLinks(rs, strContent); Console.WriteLine("timerGetLinks_Elapsed"); }
public static void AddArticle(string strContent) { string loginUrl = strApiUrl + "/index.php/api/index/add_article"; RssSource rs = new RssSource(); Encoding encoding = Encoding.GetEncoding("utf-8"); IDictionary <string, string> parameters = new Dictionary <string, string>(); parameters.Add("article_content", strContent.ToString()); HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null); StreamReader reader = new StreamReader(response.GetResponseStream(), encoding); return; //Console.WriteLine("timerGetLinks_Elapsed"); }
private static void GetSiteLinks(RssSource rs, string strContent) { string strUrlRule = rs.strArticleUrlPattern; strUrlRule = strUrlRule.Replace(".", "\\."); strUrlRule = strUrlRule.Replace("*", ".*?"); HtmlAgilityPack.HtmlDocument htmlDoc = GetHtmlDocument(strContent); if (rs.strArticleUrlRangeCssPath != "") { IEnumerable <HtmlNode> NodesUrlContent = htmlDoc.DocumentNode.QuerySelectorAll(rs.strArticleUrlRangeCssPath); if (NodesUrlContent.Count() > 0) { string strReturnPage = NodesUrlContent.ToArray()[0].InnerHtml;//进一步缩小范围 htmlDoc = GetHtmlDocument(strReturnPage); } } string baseUrl = GetUrlLeftPart(rs.strSiteUrl); DocumentWithLinks links = htmlDoc.GetLinks(); foreach (string link in links.Links.Union(links.References)) { if (string.IsNullOrEmpty(link)) { continue; } string decodedLink = link; string normalizedLink = GetNormalizedLink(baseUrl, decodedLink); if (string.IsNullOrEmpty(normalizedLink)) { continue; } MatchCollection matchs = Regex.Matches(normalizedLink, strUrlRule, RegexOptions.Singleline); if (matchs.Count > 0) { string strLinkText = ""; foreach (string strTemp in links.m_dicLink2Text.Keys) { if (strTemp.Contains(normalizedLink)) { strLinkText = links.m_dicLink2Text[strTemp]; break; } } if (strLinkText == "") { if (links.m_dicLink2Text.Keys.Contains(link)) { strLinkText = links.m_dicLink2Text[link].TrimEnd().TrimStart(); } if (links.m_dicLink2Text.Keys.Contains(link.ToLower())) { strLinkText = links.m_dicLink2Text[link.ToLower()].TrimEnd().TrimStart(); } } SaveUrlToDB(normalizedLink, strLinkText); Console.WriteLine(normalizedLink); } } return; }