示例#1
0
        // Updater
        private void doUpdateProduct(HtmlDocument page, string url)
        {
            //CrawledPage crawledPage = e.CrawledPage;

            //string storesite = crawledPage.Uri.Authority;

            // if (storesite.StartsWith("www."))
            //     storesite = storesite.Remove(0, 4);

            //if (crawledPage.WebException != null || crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK)
            //{
            //    Program.Log(String.Format("Crawl of page failed {0}", crawledPage.Uri.AbsoluteUri));
            //
            //    Program.Log("Removing dead product from database.");

            //    return;
            //}
            //else
            //{
            Program.Log(String.Format("Crawl of page succeeded {0}", url));

            try
            {
                //if (Site.SiteExists(crawledPage.Uri))
                //{
                //Site s = Site.GetSite(crawledPage.Uri);

                if (m_currentSite.IsProductPage(url))
                {
                    Product oldpn = ProductManager.GetProduct(url);

                    Product pn = m_currentSite.Parse(page, new Uri(url));

                    bool updated = false;

                    if (pn != null)
                    {
                        // Page is pointing to a new product
                        if (oldpn.Name != pn.Name)
                        {
                            //Program.Log("Found updated product name.");
                            //Product.AddProduct(pn);
                            IgnoreList.Add(url);
                            return;
                        }

                        if (oldpn.Price != pn.Price)
                        {
                            Program.Log("Found updated product price.");
                        }
                        updated = true;

                        if (oldpn.Image != pn.Image)
                        {
                            Program.Log("Found updated product image.");
                        }
                        updated = true;

                        if (oldpn.UPC != pn.UPC)
                        {
                            Program.Log("Found updated product UPC.");
                        }
                        updated = true;

                        if (oldpn.Description != pn.Description)
                        {
                            Program.Log("Found updated product description.");
                        }
                        updated = true;

                        if (updated)
                        {
                            ProductManager.UpdateProduct(pn, oldpn);
                            IgnoreList.Add(url);
                            m_Scheduler.Parent.SiteUpdateProduct(m_currentSite, 1);
                        }
                    }
                }
                else
                {
                    Program.Log("Removing dead product from database.");
                }
                //}
            }
            catch (Exception ex)
            {
                Program.Log("(Error) " + ex.ToString());
            }
            finally
            {
                IgnoreList.Add(url);
            }
            //}

            //if (string.IsNullOrEmpty(crawledPage.Content.Text))
            //    Program.Log(String.Format("Page had no content {0}", crawledPage.Uri.AbsoluteUri));
        }
示例#2
0
        private void Main_Load(object sender, EventArgs e)
        {
            Graph.Initialize();
            Store.Initialize();
            IgnoreList.Initialize();

            Logger.LogExtension = new LogPrint(Program.Log);

            scheduler = new Scheduler(this);

            Thread t = new Thread(() =>
            {
                int sites = Milkshake.Site.Sites.Count;
                int cur   = 0;

                foreach (Milkshake.Site s in Milkshake.Site.Sites)
                {
                    if (s.HasAPI || !s.Crawlable)
                    {
                        continue;
                    }

                    int count         = 0;
                    DateTimeOffset lc = DateTimeOffset.MinValue;
                    try
                    {
                        //StoreNode node = Graph.Instance.Cypher
                        //.Match("(sn:Store)")
                        //.Where((StoreNode sn) => sn.Name == s.Name)
                        //.ReturnDistinct(sn => sn.As<StoreNode>())
                        //.Results.Single();

                        count = Store.Get(s.Name).ProductCount;
                        lc    = Store.Get(s.Name).LastCrawl;

                        // NOTE: This intensive, slower process, will poll the actual count of products (necessary if statistics are not properly kept up to date)

                        /*count = (int)Graph.Instance.Cypher
                         *  .Match("(p:Product)")
                         *  .Where((ProductNode p) => p.Store == s.Name)
                         *  .ReturnDistinct(p => p.CountDistinct())
                         *  .Results.First();
                         *
                         * Graph.Instance.Cypher
                         * .Match("(sn:Store)")
                         * .Where((StoreNode sn) => sn.Name == s.Name)
                         * .Set("sn.ProductCount = {pc}")
                         * .WithParam("pc", count)
                         * .ExecuteWithoutResults();*/
                    }
                    catch
                    {
                        count = 0;
                    }

                    AddSite(s, count, lc);

                    cur++;

                    Program.Status("Fetching site data from database (" + cur + " of " + sites + ")");
                    Program.Progress(cur, sites);
                    //MessageBox.Show("Added site/store: " + s.Name);
                }

                Program.Progress(sites, sites);
            });

            t.Start();
        }