public List <object_news> getList() { List <object_news> collection_objects = new List <object_news>(); foreach (var element in all_news) { object_news new_object = new object_news(); new_object.id = element.id; new_object.type = element.type; new_object.source = element.source; new_object.title = element.title; new_object.content = element.content; new_object.image = element.image; collection_objects.Add(new_object); } return(collection_objects); }
public void readXml() { all_news.Clear(); id = 1; XmlTextReader file = new XmlTextReader(directory + @"output\news.xml"); while (file.Read()) { if (file.Name == "news") { object_news add_News = new object_news(); add_News.id = Convert.ToInt16(file.GetAttribute("id")); id = add_News.id; add_News.type = file.GetAttribute("type"); add_News.source = file.GetAttribute("source"); add_News.title = file.GetAttribute("title"); add_News.content = file.GetAttribute("content"); add_News.image = file.GetAttribute("image"); all_news.Add(add_News); } } }
public Collection <news> theverge_com() { Debug.WriteLine("recvest time({0})", DateTime.Now); //Collection_news theverge = new Collection_news(); Collection <news> collection_news = new Collection <news>(); WebRequest request = WebRequest.Create(@"https://www.theverge.com/"); WebResponse response = request.GetResponse(); StreamReader page = new StreamReader(response.GetResponseStream()); string html = page.ReadToEnd(); // get data on the main page string regular_class = @"(?inx)<div \s class \s* = \s*(?<q> ['""] )(c-compact-river__entry [^""]+ )\k<q>[^>]* >"; string regular_pruf = @"(?inx)<a \s [^>]*href \s* = \s*(?<q> ['""] )(?<url> [^""]+ )\k<q>[^>]* >"; string regular_img = @"(?inx)<img \s src \s* = (?<q> ['""] )(?<url> [^""]+)"; string regular_title = @"(?inx)<h2 \s class \s* = \s*(?<q>['""]c-entry-box--compact__title[""]+)(.*href=)([""].*[""]>)(?<title> [^<]+)"; Regex search_blok = new Regex(regular_class); Regex search_pruf = new Regex(regular_pruf); Regex search_img = new Regex(regular_img); Regex search_title = new Regex(regular_title); Match match; foreach (Match all_classes in search_blok.Matches(html)) { match = search_pruf.Match(html, all_classes.Index); source = match.Groups["url"].ToString(); match = search_img.Match(html, all_classes.Index); image = match.Groups["url"].ToString(); match = search_title.Match(html, all_classes.Index); title = match.Groups["title"].ToString(); type = "technology"; // site heve only the type news /// get data on the news page{ WebRequest request_news = WebRequest.Create(source); WebResponse response_news = request_news.GetResponse(); StreamReader page_news = new StreamReader(response_news.GetResponseStream()); string html_news = page_news.ReadToEnd(); string regular_content = @"(?inx)<meta \s name\s*= \s*(?<q>['""]description[""]) \s* content=[""](?<content> [^""]+)"; Regex search_content = new Regex(regular_content); match = search_content.Match(html_news); content = match.Groups["content"].ToString(); /// get data on the news page} news element = new object_news(); element.type = type; element.source = source; element.title = title; element.content = content; element.image = image; collection_news.Add(element); Debug.WriteLine(source); Debug.WriteLine(image); Debug.WriteLine(title); Debug.WriteLine(content); Debug.WriteLine(type); Debug.WriteLine("---------------------------"); } Debug.WriteLine("recvest time({0})", DateTime.Now); return(collection_news); }