Example #1
0
        public static StartpageData ParseStartpage()
        {
            var wc = new WebClient();
            var html = wc.DownloadString("http://www.vk.se/");
            var doc = new HtmlDocument();
            doc.LoadHtml(html);

            var blockData = new List<BlockData>();
            var links = new List<string>();
            var latestNewsLinks = new List<string>();
            var mainNewsLinks = new List<string>();
            foreach (var link in doc.DocumentNode.SelectNodes("//a[@href]"))
            {
                if (!IsValidNode(link))
                    continue;

                var attr = link.Attributes["href"].Value;

                if (string.IsNullOrEmpty(attr))
                    continue;

                if (attr.StartsWith("mailto:"))
                    continue;

                if (attr.Contains("#"))
                    attr = attr.Substring(0, attr.IndexOf("#", StringComparison.InvariantCultureIgnoreCase));

                if (!attr.StartsWith("http://www.vk.se/"))
                    continue;

                if (Blacklist.Contains(attr))
                    continue;

                if (IsLatestNewsNode(link))
                {
                    if (!latestNewsLinks.Contains(attr))
                    {
                        latestNewsLinks.Add(attr);
                    }
                }
                else if (IsMainColumnNode(link))
                {
                    if (!mainNewsLinks.Contains(attr))
                    {
                        mainNewsLinks.Add(attr);

                        var blockNr = GetBlockNumber(link);
                        var block = blockData.FirstOrDefault(b => b.BlockNr == blockNr);

                        if (block == null)
                        {
                            block = new BlockData { BlockNr = blockNr };
                            blockData.Add(block);
                        }

                        block.TotalLinks++;
                        if (attr.StartsWith("http://www.vk.se/plus/"))
                            block.TotalPlusLinks++;
                    }
                }
                else
                    if (!links.Contains(attr))
                        links.Add(attr);
            }

            var data = new StartpageData();
            data.TotalLinks = latestNewsLinks.Count + links.Count + mainNewsLinks.Count;
            data.TotalLatestNewsLinks = latestNewsLinks.Count;
            data.TotalMainLinks = mainNewsLinks.Count;

            foreach (var link in links)
            {
                if (link.StartsWith("http://www.vk.se/plus/"))
                {
                    data.TotalPlusLinks++;
                }
            }

            foreach (var link in latestNewsLinks)
            {
                if (link.StartsWith("http://www.vk.se/plus/"))
                {
                    data.TotalPlusLinks++;
                    data.TotalLatestNewsPlusLinks++;
                }
            }

            foreach (var link in mainNewsLinks)
            {
                if (link.StartsWith("http://www.vk.se/plus/"))
                {
                    data.TotalPlusLinks++;
                    data.TotalMainPlusLinks++;
                }
            }

            data.Blocks.AddRange(blockData);
            return data;
        }
Example #2
0
 public void Save(StartpageData data)
 {
     var client = new MongoClient(ConnectionString);
     var server = client.GetServer();
     var db = server.GetDatabase("vkminus");
     var coll = db.GetCollection(_collection);
     coll.Insert(data);
 }