示例#1
0
        static void saveProperty(PageCrawlCompletedArgs e)
        {
            // save to Property table
            var propertyContext = new PropertyDataContext(DemoParameters.connectionString);
            //var propertyContext = new PropertyDataContext();

            HtmlNode addressNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//span[@class='js-address']");
            string addr = "";
            if (addressNode != null) addr = addressNode.InnerText.Trim();

            HtmlNode priceNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//dd[@class='price']");
            string pric = "";
            if (priceNode != null) pric = priceNode.InnerText.Trim();

            HtmlNode propertytypeNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//dd[@class='propertytype']");
            string ptype = "";
            if (propertytypeNode != null) ptype = propertytypeNode.InnerText.Trim();

            HtmlNode saletypeNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//dd[@class='saleType']");
            string stype = "";
            if (saletypeNode != null) stype = saletypeNode.InnerText.Trim();

            HtmlNode saledateNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//dd[@class='saleDate']");
            string sdate = "";
            if (saledateNode != null) sdate = saledateNode.InnerText.Trim();

            HtmlNode landsizeNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//dd[@class='land']");
            string land = "";
            if (landsizeNode != null) land = landsizeNode.InnerText.Trim();

            HtmlNode featureNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//p[@class='features']");
            string feature = "";
            if (featureNode != null) feature = featureNode.InnerText.Trim();

            HtmlNode agentNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//ul[@class='cB-agentList']");
            string agentInfo = "";
            if (agentNode != null) agentInfo = agentNode.InnerText.Trim();

            HtmlNode schoolNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//div[@class='schoolData bdy collapsible collapsed']");
            string school = "";
            if (schoolNode != null) school = schoolNode.InnerText.Trim();

            HtmlNode descriptionNode = e.CrawledPage.HtmlDocument.DocumentNode.SelectSingleNode("//div[@class='cT-productDescription']");
            string desc = "";
            if (descriptionNode != null) desc = descriptionNode.InnerText.Trim();

            Property p = new Property
            {
                pageUrl = e.CrawledPage.Uri.ToString(),
                address = addr,
                price = pric,
                propertyType = ptype,
                saleType = stype,
                saleDate = sdate,
                suburb = "",
                landSize = land,
                propertyFeature = feature,
                agents = agentInfo,
                schoolData = school,
                propertyDescription = desc
            };

            propertyContext.Properties.InsertOnSubmit(p);

            try
            {
                propertyContext.SubmitChanges();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
                // Make some adjustments.
                // ...
                // Try again.
                propertyContext.SubmitChanges();
            }
            finally
            {
                if (propertyContext != null)
                    propertyContext.Dispose();
            }
        }
示例#2
0
        public void LoadCrawledUrls()
        {
            var propertyContext = new PropertyDataContext(DemoParameters.connectionString);
            //var propertyContext = new PropertyDataContext();
            try
            {
                IQueryable<Property> properties =
                    from prop in propertyContext.Properties
                    select prop;

                foreach (Property p in properties)
                {
                    _scheduler.AddCrawledUri(new Uri(p.pageUrl));
                }
            }
            catch
            {
                int e = 0;
            }
            finally
            {
                if (propertyContext != null)
                    propertyContext.Dispose();
            }
        }