public static float GetTotalPriceOnCheckoutPage(BrowserSession.BrowserSession browser) { // get summary container first var summary_container = browser.DefaultFrame.FindElementById("checkout-section-basket-summary"); if (summary_container == null) { throw new Exception("cant find #checkout-section-basket-summary"); } // get the last div with class "summary-item" BrowserSession.WebElement.WebElement div; { var summary_item_containers = summary_container.FindElementByClassName("summary-item"); if (summary_item_containers == null || summary_item_containers.Count < 1) { throw new Exception("cant find #checkout-section-basket-summary .summary-item"); } div = summary_item_containers[summary_item_containers.Count - 1]; } // price is stored at the second div { var divs = div.FindElementByTagName("div"); if (divs == null || divs.Count != 3) { throw new Exception("unknown format inside #checkout-section-basket-summary .summary-item"); } var txt = string.Concat(divs[1].Text.Trim().ToArray().Where(c => c == '.' || (c >= '0' && c <= '9'))); return(float.Parse(txt)); } }
protected static void NagivateBrowserToUrlWithTimeout(BrowserSession.BrowserSession browser, string url, TimeSpan timeout) { AutoResetEvent ev = new AutoResetEvent(false); var thread = new Thread(new ThreadStart(() => { try { browser.GoTo(url); ev.Set(); } catch (ThreadAbortException) { } })); thread.Start(); if (!ev.WaitOne(timeout)) { browser.Stop(); Thread.Sleep(5000); thread.Abort(); throw new TimeoutException(); } }
/// <summary> /// recycle a browser /// </summary> /// <param name="browser"></param> private void FinishUsingBrowser(BrowserSession.BrowserSession browser) { lock (_Browsers) { _Browsers.Add(browser); } }
protected override void ScrapeCore(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status) { for (int retry = 0; retry < 30; retry++) { try { // stop current loading on browser browser.Stop(); // clear bag RemoveAllItemsInBag(browser); // add all items to bag foreach (var item in this.Items) { bool sold_out; item.GetProductDetailAndAddToBag(browser, update_status, out sold_out); item.SoldOut = sold_out; } // check out Item.CheckoutAndSelectDomesticDelivery(browser); // set checkout price GetCheckoutPriceByRemovingItemOneByOne(browser); // report status foreach (var item in this.Items) { if (!item.Success.HasValue) { item.Success = true; } if (item.SoldOut.HasValue && item.SoldOut.Value) { continue; } item.SoldOut = false; item.WriteProgress(); update_status(System.Threading.Thread.CurrentThread.ManagedThreadId + ": " + string.Join(" ", item.Title, item.Color, item.OriginalPrice.ToString("f2"), item.CheckoutPrice.ToString("f2"))); } return; } catch (Exception ex) { System.Threading.Thread.Sleep(2000); } } foreach (var item in this.Items) { item.WriteFailed(); } update_status("group failed"); }
public static void CheckoutAndSelectDomesticDelivery(BrowserSession.BrowserSession browser) { #if kill browser.GoTo(URL.CheckoutUrl); #else // kill NagivateBrowserToUrlWithTimeout(browser, URL.CheckoutUrl, TimeSpan.FromMinutes(1)); #endif // kill // select domestic delivery { var domestic_button = browser.DefaultFrame.FindElementById <BrowserSession.WebElement.Anchor>("delivery-type-domestic"); if (domestic_button == null) { throw new Exception("cant find #delivery-type-domestic"); } domestic_button.ClickAsync().Wait(); Task.Delay(1000).Wait(); } }
private void GetCheckoutPriceByRemovingItemOneByOne(BrowserSession.BrowserSession browser) { var total = Item.GetTotalPriceOnCheckoutPage(browser); for (int i = 0; i < this.Items.Length; i++) { var item = this.Items[i]; if (item.SoldOut.HasValue && item.SoldOut.Value) { continue; } var first_item = browser.DefaultFrame.FindElementByClassName("consignment-item")[0]; var info_container = first_item.FindElementByClassName("consignment-item-info")[0]; var name_of_first_item = info_container.FindElementByTagName("p")[0].Text.Trim(); bool is_last_item = first_item.HasClass("last-item"); float checkout_price_of_this_item; if (is_last_item) { checkout_price_of_this_item = total; } else { Item.RemoveItemFromCheckoutPage(browser, 0); var new_total = Item.GetTotalPriceOnCheckoutPage(browser); checkout_price_of_this_item = total - new_total; total = new_total; } item.CheckoutPrice = checkout_price_of_this_item; if (is_last_item) { break; } } }
private void RemoveAllItemsInBag(BrowserSession.BrowserSession browser) { // open bag page #if kill browser.GoTo(URL.BagUrl); #else // kill NagivateBrowserToUrlWithTimeout(browser, URL.BagUrl, TimeSpan.FromMinutes(1)); #endif // kill DateTime start = DateTime.Now; while (DateTime.Now < start + TimeSpan.FromMinutes(5)) { browser.ExecuteJavascriptAsync("document.body.scrollTop = document.documentElement.scrollTop = 0;").Wait(); //var remove_button = browser.DefaultFrame.FindElementById("BasketRemove:0"); var remove_button = browser.DefaultFrame.FindElementByXPath("//span[starts-with(@id, 'BasketRemove:')]"); if (remove_button == null) { return; } try { //while (!remove_button.IsVisibleOnWindow) // browser.ExecuteJavascriptAsync("window.scrollTo(0, 10)").Wait(); remove_button.ClickWithoutScrollAsync().Wait(); //remove_button.ClickAsync().Wait(); } catch (Exception ex) { while (!browser.IsCompletedLoading) { System.Threading.Thread.Sleep(1000); } } } throw new Exception("failed to clear item from bag"); }
public void Scrape(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status, TimeSpan timeout) { AutoResetEvent ev = new AutoResetEvent(false); var thread = new Thread(new ThreadStart(() => { try { this.ScrapeCore(browser, update_status); } catch (ThreadAbortException) { } ev.Set(); })); thread.Start(); if (!ev.WaitOne(timeout)) { thread.Abort(); throw new TimeoutException(); } }
protected override void ScrapeCore(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status) { Items = null; var items = new Dictionary <string, Item>(); var item_urls = GetAllItemUrls(browser, update_status); if (item_urls != null) { foreach (var item_url in item_urls) { var item_worker = new Item(item_url); if (items.ContainsKey(item_worker.Id)) { continue; } items.Add(item_worker.Id, item_worker); } } this.Items = items.Values.ToList(); update_status(this.Url + ": " + this.Items.Count + " items"); }
/// <summary> /// get url of all items in this category page /// </summary> /// <param name="browser"></param> /// <param name="update_status"></param> /// <returns></returns> private string[] GetAllItemUrls(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status) { for (int trial = 0; trial < 10; trial++) { try { // stop current loading on browser browser.Stop(); // go to the category page #if kill browser.GoTo(this.Url); #else // kill NagivateBrowserToUrlWithTimeout(browser, this.Url, TimeSpan.FromMinutes(1)); #endif // kill var item_urls = new HashSet <string>(); while (true) { var product_containers = browser.DefaultFrame.FindElementByClassName("hproduct"); if (product_containers == null) { break; } bool added = false; foreach (var product_container in product_containers) { var anchors = product_container.FindElementByTagName <BrowserSession.WebElement.Anchor>("a"); if (anchors == null) { continue; } foreach (var anchor in anchors) { if (item_urls.Contains(anchor.Href)) { continue; } item_urls.Add(anchor.Href); added = true; } } if (!added) // no more { break; } #if future // scroll to bottom 5 times to load more items for (int i = 0; i < 5; i++) { browser.ExecuteJavascriptAsync("window.scrollTo(0, document.body.scrollHeight);").Wait(); } #else // future break; #endif // future } if (item_urls.Count == 0) { throw new Exception("force retry"); } return(item_urls.ToArray()); } catch (Exception ex) { System.Threading.Thread.Sleep(5000); } } update_status("category failed: " + this.Url); return(null); }
public void GetProductDetailAndAddToBag(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status, out bool sold_out) { sold_out = false; // navigate to the item page #if kill browser.GoTo(this.Url); #else // kill NagivateBrowserToUrlWithTimeout(browser, this.Url, TimeSpan.FromMinutes(1)); #endif // kill // find the detail element BrowserSession.WebElement.WebElement product_detail_container; { var product_detail_containers = browser.DefaultFrame.FindElementByClassName("product-detail"); if (product_detail_containers == null || product_detail_containers.Count < 1) { throw new Exception("cant find .product-detail"); } product_detail_container = product_detail_containers[0]; } // time { this.TimeUTC = DateTime.UtcNow; } // title { var title_containers = product_detail_container.FindElementByTagName("h1"); if (title_containers == null || title_containers.Count < 1) { throw new Exception("cant find .product-detail h1"); } this.Title = title_containers[0].Text.Trim(); } // image { var image_container = browser.DefaultFrame.FindElementById <BrowserSession.WebElement.Image>("main-image"); if (image_container == null) { throw new Exception("cant find #image_container"); } this.ImageUrl = image_container.Src; } // original price { var price_info_containers = product_detail_container.FindElementByClassName("price"); if (price_info_containers == null || price_info_containers.Count < 1) { throw new Exception("cant find .product-detail .price"); } string txt = string.Concat(price_info_containers[0].Text.ToCharArray().Where(c => c == '.' || (c >= '0' && c <= '9'))); this.OriginalPrice = float.Parse(txt); } // color { var color_container = product_detail_container.FindElementById("product-colour"); if (color_container == null) { throw new Exception("cant find .product-detail #product-colour"); } this.Color = color_container.Text.Trim(); } // sizes { List <string> options = new List <string>(); var available_sizes_container = product_detail_container.FindElementById("available-sizes"); if (available_sizes_container != null) { var select_containers = product_detail_container.FindElementByTagName("select"); if (select_containers != null && select_containers.Count == 1) { var option_containers = available_sizes_container.FindElementByTagName <BrowserSession.WebElement.Option>("option"); if (option_containers != null) { foreach (var option_container in option_containers) { if (option_container.Value == "0") { continue; } options.Add(option_container.InnerHTML.Trim().Replace("\"", "\'")); // select this size for later stage of add-to-bag browser.ExecuteJavascriptAsync("document.getElementById('product_id').value='" + option_container.Value + "';").Wait(); } } } } this.Sizes = options.ToArray(); } // add to bag { var form = product_detail_container.FindElementById <BrowserSession.WebElement.Form>("add-to-bag"); if (form == null) { sold_out = true; return; //throw new Exception("cant find .product-detail #add-to-bag"); } var cart_count_container = browser.DefaultFrame.FindElementById("minicart-count"); if (cart_count_container == null) { throw new Exception("cant find #minicart-count"); } var original_count = cart_count_container.Text.Trim(); form.SubmitAsync().Wait(); // wait until cart is updated int failed = 0; bool added = false; DateTime start_wait = DateTime.Now; while (DateTime.Now < start_wait + TimeSpan.FromMinutes(1)) { try { cart_count_container = browser.DefaultFrame.FindElementById("minicart-count"); var new_count = cart_count_container.Text.Trim(); if (original_count != new_count) { added = true; break; } } catch { failed++; if (failed >= 50) { throw new Exception("cant update cart"); } } Task.Delay(200).Wait(); } if (!added) { sold_out = true; update_status(this.Title + " is sold out"); return; } } }
protected override void ScrapeCore(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status) { for (int trial = 0; trial < 5; trial++) { try { // stop current loading on browser browser.Stop(); bool sold_out; GetProductDetailAndAddToBag(browser, update_status, out sold_out); if (sold_out) { SoldOut = true; return; } // get checkout price by checking out { var original_tab = browser.CurrentTabHandle; // open a new tab to show cart try { // find the checkout tab string checkout_tab = null; { var tabs = browser.AllTabHandles; foreach (var tab in tabs) { browser.SwitchToTab(tab); if (browser.Url == URL.CheckoutUrl) { checkout_tab = tab; break; } } } if (checkout_tab == null) { // no checkout tab yet // create tab checkout_tab = browser.CreateNewTabAsync().Result; browser.SwitchToTab(checkout_tab); } CheckoutAndSelectDomesticDelivery(browser); // remove all items in bag except the last one { var remove_button_containers = browser.DefaultFrame.FindElementByClassName <BrowserSession.WebElement.Anchor>("remove-consignment-line"); if (remove_button_containers == null || remove_button_containers.Count < 1) { throw new Exception("cant find .remove-consignment-line"); } for (int i = 0; i < remove_button_containers.Count - 1; i++) { RemoveItemFromCheckoutPage(browser, 0); } } // get the total price in cart this.CheckoutPrice = GetTotalPriceOnCheckoutPage(browser); } finally { // switch back to original tab browser.SwitchToTab(original_tab); } } WriteProgress(); update_status(string.Join(" ", this.Title, this.Color, this.OriginalPrice.ToString("f2"), this.CheckoutPrice.ToString("f2"))); Success = true; SoldOut = false; return; } catch (Exception ex) { update_status("failed=" + this.Url + " trial=" + trial + " err=" + ex.Message); } } WriteFailed(); Success = false; }
public static void RemoveItemFromCheckoutPage(BrowserSession.BrowserSession browser, int item_index) { var remove_button_containers = browser.DefaultFrame.FindElementByClassName <BrowserSession.WebElement.Anchor>("remove-consignment-line"); if (remove_button_containers == null || remove_button_containers.Count < 1) { throw new Exception("cant find .remove-consignment-line"); } // click cross button remove_button_containers[item_index].ClickWithoutScrollAsync().Wait(); // click yes button var modal = browser.DefaultFrame.FindElementById("remove-consignment-modal"); if (modal == null) { throw new Exception("cant find #remove-consignment-modal"); } Func <BrowserSession.WebElement.Button> get_yes_button = () => { try { var buttons = modal.FindElementByClassName <BrowserSession.WebElement.Button>("btn-primary"); if (buttons == null || buttons.Count != 1) { throw new Exception("cant find #remove-consignment-modal .btn-primary"); } return(buttons[0]); } catch // stale exception { return(null); } }; // wait until yes button appears DateTime start_wait = DateTime.Now; while (DateTime.Now < start_wait + TimeSpan.FromMinutes(1)) { try { var yes_button = get_yes_button(); if (yes_button != null) { break; } } catch { } Task.Delay(100).Wait(); } if (get_yes_button() == null) { throw new Exception("yes button doesnt appear"); } //get_yes_button().ClickWithoutScrollAsync().Wait(); // wait until yes button disappears start_wait = DateTime.Now; while (DateTime.Now < start_wait + TimeSpan.FromMinutes(1)) { try { var yes_button = get_yes_button(); if (yes_button == null) { break; } yes_button.ClickWithoutScrollAsync().Wait(); } catch { } Task.Delay(500).Wait(); } if (get_yes_button() != null) { throw new Exception("yes button doesnt disappear"); } }
public void Scrape(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status) { ScrapeCore(browser, update_status); }
protected abstract void ScrapeCore(BrowserSession.BrowserSession browser, UpdateStatusDelegate update_status);