Example #1
0
        public static Dictionary<string, string> Crawl(string RegNum, string Url, CrawlingSettings cs)
        {
            // Check If the URL is Valid
            if (Url == null) return null;

            //  Check if the Url is working
            if (!IsValidURL(Url)) return null;



            //Crawl
            Dictionary<string, string> list = CrawlUrl(RegNum, Url, cs);
            return list;
        }
Example #2
0
        private static Dictionary<string, string> CrawlUrl(string RegNum, string Url, CrawlingSettings cs)
        {
            #region Init
            // fix the url if it doesn't have http in the begining
            if (!Url.ToLower().Contains("http"))
                Url = "http://" + Url;
            // Results 
            Dictionary<string, string> results = new Dictionary<string, string>();
            #endregion
            #region init The dictionary
            switch (cs.CrawlItemType)
            {
                case "Social Media":
                    results.Add("facebook", "");
                    results.Add("twitter", "");
                    results.Add("linkedin", "");
                    results.Add("youtube", "");
                    break;

                case "Contact Info":
                    results.Add("email", "");
                    results.Add("phone", "");
                    results.Add("fax", "");
                    break;


            }
            #endregion
            #region Crawling
            // Crawling all social media everytime
            try
            {
                WebClient w = new WebClient();
                string s = w.DownloadString(Url);
                List<LinkItem> allLinks = LinkFinder.Find(s);
                foreach (LinkItem i in allLinks)
                {

                    if (i.Href == null) continue;

                    if (cs.CrawlItemType == "Social Media")
                    {
                        if (i.Href.ToLower().Contains("www.facebook"))
                            results["facebook"] = i.Href;
                        if (i.Href.ToLower().Contains("www.twitter"))
                            results["twitter"] = i.Href;
                        if (i.Href.ToLower().Contains("www.linkedin"))
                            results["linkedin"] = i.Href;
                        if (i.Href.ToLower().Contains("www.youtube"))
                            results["youtube"] = i.Href;
                    }
                    if (cs.CrawlItemType == "Contact Info")
                    {
                        if (i.Href.ToLower().Contains("email"))
                            results["email"] = i.Href;
                        if (i.Href.ToLower().Contains("phone"))
                            results["phone"] = i.Href;
                        if (i.Href.ToLower().Contains("fax"))
                            results["fax"] = i.Href;

                    }


                }


            }
            catch { return null; }

            #endregion

            return results;
        }
Example #3
0
 public mainForm()
 {
     InitializeComponent();
     cs = new CrawlingSettings();
 }