Esempio n. 1
0
        private void ParseUrl(string url)
        {
            // if url fails to get title, ignore page
            string pageContent = DownloadText(url);
            string title       = searcher.GetTitle(pageContent);

            if (title == string.Empty)
            {
                return;
            }
            DateTime date = searcher.GetPubDate(pageContent);

            connection.AddToUrlTable(UrlEntity.CreateEntitiesFromString(title, url, date));
            foreach (Match match in searcher.GetAllLinks(pageContent))
            {
                string linkToAdd = match.Groups[1].Value;
                if (linkToAdd.StartsWith("//"))
                {
                    // assume http
                    linkToAdd = "http:" + linkToAdd;
                }
                if (linkToAdd.StartsWith("/"))
                {
                    // relative url, add in root based on current domain
                    if (url.Contains("cnn.com"))
                    {
                        linkToAdd = "http://cnn.com" + linkToAdd;
                    }
                    else if (url.Contains("blearcherreport.com"))
                    {
                        linkToAdd = "http://bleacherreport.com" + linkToAdd;
                    }
                    else
                    {
                        continue;
                    }
                }
                if (!visitedUrls.Contains(linkToAdd))
                {
                    AddToUrlQueue(linkToAdd);
                }
            }
        }