// Updater private void doUpdateProduct(HtmlDocument page, string url) { //CrawledPage crawledPage = e.CrawledPage; //string storesite = crawledPage.Uri.Authority; // if (storesite.StartsWith("www.")) // storesite = storesite.Remove(0, 4); //if (crawledPage.WebException != null || crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK) //{ // Program.Log(String.Format("Crawl of page failed {0}", crawledPage.Uri.AbsoluteUri)); // // Program.Log("Removing dead product from database."); // return; //} //else //{ Program.Log(String.Format("Crawl of page succeeded {0}", url)); try { //if (Site.SiteExists(crawledPage.Uri)) //{ //Site s = Site.GetSite(crawledPage.Uri); if (m_currentSite.IsProductPage(url)) { Product oldpn = ProductManager.GetProduct(url); Product pn = m_currentSite.Parse(page, new Uri(url)); bool updated = false; if (pn != null) { // Page is pointing to a new product if (oldpn.Name != pn.Name) { //Program.Log("Found updated product name."); //Product.AddProduct(pn); IgnoreList.Add(url); return; } if (oldpn.Price != pn.Price) { Program.Log("Found updated product price."); } updated = true; if (oldpn.Image != pn.Image) { Program.Log("Found updated product image."); } updated = true; if (oldpn.UPC != pn.UPC) { Program.Log("Found updated product UPC."); } updated = true; if (oldpn.Description != pn.Description) { Program.Log("Found updated product description."); } updated = true; if (updated) { ProductManager.UpdateProduct(pn, oldpn); IgnoreList.Add(url); m_Scheduler.Parent.SiteUpdateProduct(m_currentSite, 1); } } } else { Program.Log("Removing dead product from database."); } //} } catch (Exception ex) { Program.Log("(Error) " + ex.ToString()); } finally { IgnoreList.Add(url); } //} //if (string.IsNullOrEmpty(crawledPage.Content.Text)) // Program.Log(String.Format("Page had no content {0}", crawledPage.Uri.AbsoluteUri)); }
private void Main_Load(object sender, EventArgs e) { Graph.Initialize(); Store.Initialize(); IgnoreList.Initialize(); Logger.LogExtension = new LogPrint(Program.Log); scheduler = new Scheduler(this); Thread t = new Thread(() => { int sites = Milkshake.Site.Sites.Count; int cur = 0; foreach (Milkshake.Site s in Milkshake.Site.Sites) { if (s.HasAPI || !s.Crawlable) { continue; } int count = 0; DateTimeOffset lc = DateTimeOffset.MinValue; try { //StoreNode node = Graph.Instance.Cypher //.Match("(sn:Store)") //.Where((StoreNode sn) => sn.Name == s.Name) //.ReturnDistinct(sn => sn.As<StoreNode>()) //.Results.Single(); count = Store.Get(s.Name).ProductCount; lc = Store.Get(s.Name).LastCrawl; // NOTE: This intensive, slower process, will poll the actual count of products (necessary if statistics are not properly kept up to date) /*count = (int)Graph.Instance.Cypher * .Match("(p:Product)") * .Where((ProductNode p) => p.Store == s.Name) * .ReturnDistinct(p => p.CountDistinct()) * .Results.First(); * * Graph.Instance.Cypher * .Match("(sn:Store)") * .Where((StoreNode sn) => sn.Name == s.Name) * .Set("sn.ProductCount = {pc}") * .WithParam("pc", count) * .ExecuteWithoutResults();*/ } catch { count = 0; } AddSite(s, count, lc); cur++; Program.Status("Fetching site data from database (" + cur + " of " + sites + ")"); Program.Progress(cur, sites); //MessageBox.Show("Added site/store: " + s.Name); } Program.Progress(sites, sites); }); t.Start(); }