public static StartpageData ParseStartpage() { var wc = new WebClient(); var html = wc.DownloadString("http://www.vk.se/"); var doc = new HtmlDocument(); doc.LoadHtml(html); var blockData = new List<BlockData>(); var links = new List<string>(); var latestNewsLinks = new List<string>(); var mainNewsLinks = new List<string>(); foreach (var link in doc.DocumentNode.SelectNodes("//a[@href]")) { if (!IsValidNode(link)) continue; var attr = link.Attributes["href"].Value; if (string.IsNullOrEmpty(attr)) continue; if (attr.StartsWith("mailto:")) continue; if (attr.Contains("#")) attr = attr.Substring(0, attr.IndexOf("#", StringComparison.InvariantCultureIgnoreCase)); if (!attr.StartsWith("http://www.vk.se/")) continue; if (Blacklist.Contains(attr)) continue; if (IsLatestNewsNode(link)) { if (!latestNewsLinks.Contains(attr)) { latestNewsLinks.Add(attr); } } else if (IsMainColumnNode(link)) { if (!mainNewsLinks.Contains(attr)) { mainNewsLinks.Add(attr); var blockNr = GetBlockNumber(link); var block = blockData.FirstOrDefault(b => b.BlockNr == blockNr); if (block == null) { block = new BlockData { BlockNr = blockNr }; blockData.Add(block); } block.TotalLinks++; if (attr.StartsWith("http://www.vk.se/plus/")) block.TotalPlusLinks++; } } else if (!links.Contains(attr)) links.Add(attr); } var data = new StartpageData(); data.TotalLinks = latestNewsLinks.Count + links.Count + mainNewsLinks.Count; data.TotalLatestNewsLinks = latestNewsLinks.Count; data.TotalMainLinks = mainNewsLinks.Count; foreach (var link in links) { if (link.StartsWith("http://www.vk.se/plus/")) { data.TotalPlusLinks++; } } foreach (var link in latestNewsLinks) { if (link.StartsWith("http://www.vk.se/plus/")) { data.TotalPlusLinks++; data.TotalLatestNewsPlusLinks++; } } foreach (var link in mainNewsLinks) { if (link.StartsWith("http://www.vk.se/plus/")) { data.TotalPlusLinks++; data.TotalMainPlusLinks++; } } data.Blocks.AddRange(blockData); return data; }
public void Save(StartpageData data) { var client = new MongoClient(ConnectionString); var server = client.GetServer(); var db = server.GetDatabase("vkminus"); var coll = db.GetCollection(_collection); coll.Insert(data); }