Exemple #1
0
        public static RssSource GetOneTask()
        {
            string loginUrl = strApiUrl + "/index.php/api/index/get_one_task";

            RssSource rs       = new RssSource();
            Encoding  encoding = Encoding.GetEncoding("utf-8");

            IDictionary <string, string> parameters = new Dictionary <string, string>();


            HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null);
            StreamReader    reader   = new StreamReader(response.GetResponseStream(), encoding);



            string  result = reader.ReadToEnd();
            JObject jo     = JObject.Parse(result);

            string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray();
            JArray   ja     = (JArray)JsonConvert.DeserializeObject(values[3]);

            foreach (JToken jt in ja)
            {
                rs.strSiteName               = jt["site_name"].ToString();
                rs.strSiteCode               = jt["site_code"].ToString();
                rs.strSiteUrl                = jt["site_url"].ToString();
                rs.strArticleUrlPattern      = jt["article_url_pattern"].ToString();
                rs.strArticleUrlRangeCssPath = jt["article_url_range"].ToString();
            }

            return(rs);
            //Console.WriteLine("timerGetLinks_Elapsed");
        }
Exemple #2
0
        public static void GetAllRules()
        {
            string loginUrl = strApiUrl + "/index.php/api/index/get_all_rules";

            RssSource rs       = new RssSource();
            Encoding  encoding = Encoding.GetEncoding("utf-8");

            IDictionary <string, string> parameters = new Dictionary <string, string>();


            HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null);
            StreamReader    reader   = new StreamReader(response.GetResponseStream(), encoding);



            string  result = reader.ReadToEnd();
            JObject jo     = JObject.Parse(result);

            string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray();
            JArray   ja     = (JArray)JsonConvert.DeserializeObject(values[3]);

            foreach (JToken jt in ja)
            {
                string strArticle_url_pattern     = jt["article_url_pattern"].ToString();
                string strArticle_content_csspath = jt["article_content_csspath"].ToString();
                m_dicSiteRules.Add(strArticle_url_pattern, strArticle_content_csspath);
            }

            return;
            //Console.WriteLine("timerGetLinks_Elapsed");
        }
Exemple #3
0
        public static int IsLinkExist(int nOffset1, int nOffset2)
        {
            string loginUrl = strApiUrl + "/index.php/api/index/is_link_exist";

            RssSource rs       = new RssSource();
            Encoding  encoding = Encoding.GetEncoding("utf-8");

            IDictionary <string, string> parameters = new Dictionary <string, string>();

            parameters.Add("offset1", nOffset1.ToString());
            parameters.Add("offset2", nOffset2.ToString());

            HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null);
            StreamReader    reader   = new StreamReader(response.GetResponseStream(), encoding);



            string  result = reader.ReadToEnd();
            JObject jo     = JObject.Parse(result);

            string[] values = jo.Properties().Select(item => item.Value.ToString()).ToArray();


            return(int.Parse(values[1]));
            //Console.WriteLine("timerGetLinks_Elapsed");
        }
Exemple #4
0
        public static void timerGetLinks_Elapsed(object sender, System.Timers.ElapsedEventArgs e)
        {
            RssSource     rs         = GetOneTask();
            WebDownloader wd         = new WebDownloader();
            Encoding      ec         = Encoding.GetEncoding("UTF-8");
            string        strContent = wd.GetPageByHttpWebRequest(rs.strSiteUrl, ec, "");

            GetSiteLinks(rs, strContent);

            Console.WriteLine("timerGetLinks_Elapsed");
        }
Exemple #5
0
        public static void AddArticle(string strContent)
        {
            string loginUrl = strApiUrl + "/index.php/api/index/add_article";

            RssSource rs       = new RssSource();
            Encoding  encoding = Encoding.GetEncoding("utf-8");

            IDictionary <string, string> parameters = new Dictionary <string, string>();

            parameters.Add("article_content", strContent.ToString());


            HttpWebResponse response = HttpWebResponseUtility.CreatePostHttpResponse(loginUrl, parameters, null, null, encoding, null);
            StreamReader    reader   = new StreamReader(response.GetResponseStream(), encoding);


            return;
            //Console.WriteLine("timerGetLinks_Elapsed");
        }
Exemple #6
0
        private static void GetSiteLinks(RssSource rs, string strContent)
        {
            string strUrlRule = rs.strArticleUrlPattern;

            strUrlRule = strUrlRule.Replace(".", "\\.");
            strUrlRule = strUrlRule.Replace("*", ".*?");

            HtmlAgilityPack.HtmlDocument htmlDoc = GetHtmlDocument(strContent);

            if (rs.strArticleUrlRangeCssPath != "")
            {
                IEnumerable <HtmlNode> NodesUrlContent = htmlDoc.DocumentNode.QuerySelectorAll(rs.strArticleUrlRangeCssPath);
                if (NodesUrlContent.Count() > 0)
                {
                    string strReturnPage = NodesUrlContent.ToArray()[0].InnerHtml;//进一步缩小范围
                    htmlDoc = GetHtmlDocument(strReturnPage);
                }
            }

            string            baseUrl = GetUrlLeftPart(rs.strSiteUrl);
            DocumentWithLinks links   = htmlDoc.GetLinks();



            foreach (string link in links.Links.Union(links.References))
            {
                if (string.IsNullOrEmpty(link))
                {
                    continue;
                }


                string decodedLink = link;

                string normalizedLink = GetNormalizedLink(baseUrl, decodedLink);


                if (string.IsNullOrEmpty(normalizedLink))
                {
                    continue;
                }

                MatchCollection matchs = Regex.Matches(normalizedLink, strUrlRule, RegexOptions.Singleline);
                if (matchs.Count > 0)
                {
                    string strLinkText = "";

                    foreach (string strTemp in links.m_dicLink2Text.Keys)
                    {
                        if (strTemp.Contains(normalizedLink))
                        {
                            strLinkText = links.m_dicLink2Text[strTemp];
                            break;
                        }
                    }

                    if (strLinkText == "")
                    {
                        if (links.m_dicLink2Text.Keys.Contains(link))
                        {
                            strLinkText = links.m_dicLink2Text[link].TrimEnd().TrimStart();
                        }
                        if (links.m_dicLink2Text.Keys.Contains(link.ToLower()))
                        {
                            strLinkText = links.m_dicLink2Text[link.ToLower()].TrimEnd().TrimStart();
                        }
                    }
                    SaveUrlToDB(normalizedLink, strLinkText);
                    Console.WriteLine(normalizedLink);
                }
            }
            return;
        }