Beispiel #1
0
        public int Get_VIETNAMNET(string userid, int portalid, string culturecode, string code, string link, string category, int num_rows)
        {
            HtmlWeb            hw     = new HtmlWeb();
            HtmlDocument       doc    = hw.Load(link);
            HtmlNodeCollection tables = doc.DocumentNode.SelectNodes("//div[@class='item']");

            int result = -1;
            int i      = 1;

            foreach (HtmlNode table in tables)
            {
                if (i < num_rows)
                {
                    string img = "";
                    img = table.InnerHtml;
                    img = Regex.Match(img, "src=\"(?<img>.*?)\"").Groups["img"].ToString();
                    string linkdetail = table.SelectNodes(".//a[@class='item_link']")[0].OuterHtml;
                    linkdetail = Regex.Match(linkdetail, " href=\"(?<link>.*?)\"").Groups["link"].ToString();
                    linkdetail = "http://vietnamnet.vn" + linkdetail;
                    string headline = table.SelectNodes(".//a[@class='item_link']")[0].InnerText;
                    headline = headline.Replace("'", "");
                    string title     = headline;
                    string shorttext = table.SelectNodes(".//div[@class='lead']")[0].InnerText;

                    HtmlWeb      hw2      = new HtmlWeb();
                    HtmlDocument doc2     = hw2.Load(linkdetail);
                    HtmlNode     datenote = doc2.DocumentNode.SelectNodes("//div[@id='article']//div[@id='date']")[0];
                    string       date     = datenote.InnerText;
                    date = date.Trim();
                    date = ModuleClass.RemoveSign4VietnameseString(date);
                    date = date.Replace("\r\n", "");
                    date = date.Replace(" ", "");
                    //date = Regex.Match(date, "Capnhatluc(?<date>.*?)").Groups["date"].ToString();
                    date = date.Substring("Capnhatluc".Length, 20);

                    date = date.Substring(0, 10) + " " + date.Substring(10, 8) + " " + date.Substring(18, 2);

                    HtmlNode contentnode = doc2.DocumentNode.SelectNodes("//div[@class='article_content']")[0];
                    string   contents    = "";
                    contents = contentnode.InnerHtml;
                    contents = contents.Trim();
                    string status        = "-1";
                    string source        = "vietnamnet";
                    string main_img      = string.Empty;
                    string abstract_info = "";
                    string navigateurl   = "";
                    result = Insert(userid, portalid, culturecode, code, title, headline, abstract_info, img, main_img, contents, source, navigateurl, status);
                    i++;
                }
            }
            return(result);
        }