コード例 #1
0
ファイル: ParseUtil.cs プロジェクト: squarewave24/Scraper
 public static string LookupQueryStringValue(MainViewModel vm, Dictionary<string, string> lookupCollection, string queryStringKey)
 {
     string val = null;
     if (vm.QueryPairs[queryStringKey] != "0")
         val = (lookupCollection != null && lookupCollection.ContainsKey(vm.QueryPairs[queryStringKey]) ? lookupCollection[vm.QueryPairs[queryStringKey]] : vm.QueryPairs[queryStringKey]);
     return val;
 }
コード例 #2
0
ファイル: HtmlUtil.cs プロジェクト: squarewave24/Scraper
        public static HtmlDocument GetUrlContent(MainViewModel vm)
        {
            HtmlDocument doc = new HtmlDocument();
            var fileName = EscapeFileName(vm.Url);
            if (CheckCache(fileName)) {
                vm.CacheStatus = "From Cache";
                doc.LoadHtml(ReadFromCache(fileName));
            }
            else {

                var webGet = new HtmlWeb();
                // webGet.CachePath = GetCacheLocation();
                webGet.UsingCache = false;

                // webGet.CacheOnly = true;
                doc = webGet.Load(vm.Url);
                vm.CacheStatus = "Loaded from Url " + (webGet.FromCache ? "cached" : "");
                doc.Save(GetFilePath(fileName));

            }

            return doc;
        }
コード例 #3
0
ファイル: ParseUtil.cs プロジェクト: squarewave24/Scraper
        public static void ParseDropdowns(MainViewModel vm)
        {
            string host = new Uri(vm.Url).Host;

            var watch = new Stopwatch();
            watch.Start();

            vm.LoadDoc();
            vm.RetrieveStatus = string.Format("{0} retrieved in {1}:{2}", vm.Url, watch.Elapsed.Minutes, watch.Elapsed.Seconds);
        }
コード例 #4
0
ファイル: ParseUtil.cs プロジェクト: squarewave24/Scraper
        public static bool ParseConsultants(MainViewModel vm, bool persistToDatabase)
        {
            var watch = new Stopwatch(); watch.Start();

            vm.LoadDoc();

            // page expertease
            vm.PageIndustry = string.Format("{0}", LookupQueryStringValue(vm, vm.Industries, "Expertise"));
            // page country
            vm.PageCountry = string.Format("{0}", LookupQueryStringValue(vm, vm.Countries, "Location"));
            // page city
            vm.PageCity = string.Format("{0}", LookupQueryStringValue(vm, vm.Cities, "Office"));
            // page number
            vm.PageNumber = string.IsNullOrEmpty(vm.QueryPairs["page"]) ? null : (int?)Int32.Parse(vm.QueryPairs["page"]);
            //  pages

            // page consultants
            var consultantElements = DomUtil.GetElementsByClass(vm.Doc, "td", "submain");
            if (consultantElements == null || consultantElements.Count() < 1)
                return false;

            var sb = new StringBuilder();
            var contacts = new List<Contact>();
            foreach (var consutlantElement in consultantElements) {
                var c = new Contact();
                c.Industry = vm.PageIndustry;
                c.Country = vm.PageCountry;
                c.City = vm.PageCity;
                c.Source = vm.GetHost();

                foreach (var e in consutlantElement.ChildNodes) {
                    var val = e.InnerText;
                    switch (e.Name) {

                        case "#text":
                            if (string.IsNullOrEmpty(c.Title))
                                c.Title = val;
                            break;
                        case "a":
                            if (val.Contains('@'))
                                c.Email = val;
                            else
                                c.Name = val;

                            if (e.Attributes["href"] != null && e.Attributes["href"].Value.IndexOf("/Bios") > -1)
                                c.BioPage = string.Format("http://{0}{1}", c.Source, e.Attributes["href"].Value.Substring(e.Attributes["href"].Value.IndexOf("/Bios")));
                            break;

                    }
                }
                sb.Append(c.ToString());
                sb.Append("\n");
                contacts.Add(c);
            }
            if (persistToDatabase)
                new ConsultantDataAccess().SaveConsultant(contacts);
            Task.Factory.StartNew(() => {
                vm.RetrieveStatus = string.Format("{0} retrieved in {1}:{2}", vm.Url, watch.Elapsed.Minutes, watch.Elapsed.Seconds);
                vm.PageConsultants = sb.ToString();
            });

            return true;
        }