示例#1
0
        public static async Task <int> ReadCommunityHomeListPageNumber(string url)
        {
            IDocument document = await WebPageReader.GetPageAsync(url);

            var cell = document.QuerySelector("div.house-lst-page-box");

            if (cell == null)
            {
                return(1);
            }

            var pageData  = cell.GetAttribute("page-data");
            int totalPage = 0;

            try
            {
                JObject jsonObj = JObject.Parse(pageData);

                totalPage = Convert.ToInt32(jsonObj["totalPage"]);
            }
            catch (Exception ex)
            {
                throw new Exception("page data is:" + pageData, ex);
            }

            return(totalPage);
        }
示例#2
0
        public static async Task <List <string> > ReadCommunityHomeURL(string url)
        {
            IDocument document = await WebPageReader.GetPageAsync(url);

            List <string> homeURLs = new List <string>();

            var homeItemList = document.QuerySelectorAll("ul.sellListContent li.clear");

            if (homeItemList == null)
            {
                return(homeURLs);
            }

            foreach (var homeItem in homeItemList)
            {
                IElement homeTitle = homeItem.QuerySelector("div.info div.title a");

                if (homeTitle != null)
                {
                    homeURLs.Add(homeTitle.GetAttribute("href"));
                }
            }

            return(homeURLs);
        }
        //The url will be like 'https://xa.ke.com/xiaoqu/pg1/' which the number is the page number
        public static async Task <List <Community> > ReadCommunityListData(string url)
        {
            IDocument document = await WebPageReader.GetPageAsync(url);

            var communityItemList = document.QuerySelectorAll("ul.listContent li.xiaoquListItem");

            List <Community> communities = new List <Community>();

            foreach (var communityItem in communityItemList)
            {
                string communityName = communityItem.QuerySelector("div.info div.title a").InnerHtml;
                string districtName  = communityItem.QuerySelector("div.info div.positionInfo a.district").InnerHtml;
                string neighborhood  = communityItem.QuerySelector("div.info div.positionInfo a.bizcircle").InnerHtml;
                string listingPrice  = communityItem.QuerySelector("div.xiaoquListItemRight div.xiaoquListItemPrice div.totalPrice span").InnerHtml;
                string listingUnits  = communityItem.QuerySelector("div.xiaoquListItemRight div.xiaoquListItemSellCount a.totalSellCount span").InnerHtml;
                string seashellId    = communityItem.GetAttribute("data-id");
                string seashellURL   = communityItem.QuerySelector("div.info div.title a").GetAttribute("href");

                SeashellContext        context = new SeashellContext();
                AdministrativeDistrict administrativeDistrict = new AdministrativeDistrictRepository(context).GetByName(districtName);
                Community communityToAdd = new Community()
                {
                    CommunityName            = communityName,
                    AdministrativeDistrictId = administrativeDistrict.AdministrativeDistrictId,
                    Neighborhood             = neighborhood,
                    External_id          = seashellId,
                    SeashellURL          = seashellURL,
                    CommunityHistoryInfo = new List <CommunityHistoryInfo>().Append(new CommunityHistoryInfo()
                    {
                        CommunityListingPrice = decimal.TryParse(listingPrice, out decimal price) ? price : 0,
                        CommunityListingUnits = int.TryParse(listingUnits, out int units) ? units : 0,
                        DataTime = DateTime.Now.Date
                    }).ToList()
示例#4
0
        public async Task TestGetPageAsync()
        {
            string url = "https://ke-image.ljcdn.com/hdic-frame/standard_3bb359d7-db9d-46b5-af3b-982efc6bfa13.png!m_fill,w_1000,h_750,l_bk,f_jpg,ls_50?from=ke.com";

            var document = await WebPageReader.GetPageAsync(url);

            string stream = document.Source.Text;

            Assert.IsNotNull(stream);
        }
示例#5
0
 private async void readBtn_Click(object sender, EventArgs e)
 {
     try
     {
         Task <string> tsk = WebPageReader.ReadPage(urlTb.Text);
         webPageContentTb.Text = await tsk;
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
示例#6
0
        public void PrototypeWithoutBuiltIn()
        {
            const string webUrl = "http://www.google.com";
            var          reader = new WebPageReader(webUrl);

            reader.PrintPageInfo();

            var cloneReader = reader.Clone();

            //Assert.Equals(cloneReader.Title, reader.Title);
            //Assert.Equals(cloneReader.Header, reader.Header);
            //Assert.Equals(cloneReader.Body, reader.Body);
            Assert.AreNotSame(reader, cloneReader);
        }
示例#7
0
 private async void footballBetclicBtn_Click(object sender, EventArgs e)
 {
     try
     {
         BetclicParser parser = new BetclicParser();
         //List<BetParsingInformation> bets = parser.ReadPageContent(Sport.FOOTBALL, webPageContentTb.Text);
         List <BetParsingInformation> bets = parser.ReadSourceCode(Sport.FOOTBALL, WebPageReader.ReadSourceCode(urlTb.Text));
         parser.ReadDetailedPageContent(Sport.FOOTBALL, WebPageReader.ReadSourceCode(bets[0].Url), bets[0].Odds);
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
示例#8
0
 private void betclicDetailFootballBtn_Click(object sender, EventArgs e)
 {
     try
     {
         BetclicParser parser = new BetclicParser();
         List <BetParsingInformation> bets2 = parser.ReadSourceCode(Sport.FOOTBALL, WebPageReader.ReadSourceCode(urlTb.Text));
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
        public static async Task <Home> ReadCommunityHomeDetail(string url)
        {
            IDocument document = await WebPageReader.GetPageAsync(url);

            IHtmlCollection <IElement> infoListItem = document.QuerySelectorAll("div.base div.content ul li");

            foreach (IElement item in infoListItem)
            {
                item.RemoveChild(item.FirstChild);
            }

            int     bedrooms         = 0;
            int     bathrooms        = 0;
            int     totalFloors      = 0;
            decimal constructionArea = 0;

            Regex  roomInfoReg = new Regex("[0-9]室[0-9]厅[0-9]卫");
            string roomInfo    = infoListItem[0].InnerHtml;

            if (roomInfoReg.IsMatch(roomInfo))
            {
                bedrooms  = int.Parse(roomInfo.Substring(0, 1));
                bathrooms = int.Parse(roomInfo.Substring(4, 1));
            }
            else
            {
                throw new Exception("The format of roomInfo is not as expected:" + roomInfo);
            }

            Regex  floorInfoReg = new Regex("[高中低]楼层 [(]共[0-9]{1,2}层[)]");
            string floorInfo    = infoListItem[1].InnerHtml;

            if (floorInfoReg.IsMatch(floorInfo))
            {
                totalFloors = int.Parse(System.Text.RegularExpressions.Regex.Replace(floorInfo, @"[^0-9]+", ""));
            }
            else
            {
                throw new Exception("The format of floorInfo is not as expected:" + floorInfo);
            }

            Regex  areaReg = new Regex("^([1-9][0-9]*)+(.{0,1}[0-9]{0,2})㎡");
            string area    = infoListItem[2].InnerHtml;

            if (areaReg.IsMatch(area))
            {
                decimal.TryParse(area.Substring(0, area.Length - 1), out constructionArea);
            }
            else
            {
                throw new Exception("The format of area is not as expected:" + area);
            }

            decimal  totalPrice = 0;
            IElement priceEle   = document.QuerySelector("div.price span.total");

            if (priceEle != null && !decimal.TryParse(priceEle.InnerHtml, out totalPrice))
            {
                throw new Exception("The format of priceEle is not as expected:" + priceEle.InnerHtml);
            }

            decimal totalFloorArea              = 0;
            string  floorAreaDetail             = string.Empty;
            IHtmlCollection <IElement> roomList = document.QuerySelectorAll("div.layout div#infoList div.row");
            JObject roomsJson   = new JObject();
            Regex   roomAreaReg = new Regex("^([1-9][0-9]*)+(.{0,1}[0-9]{0,2})平米");

            foreach (IElement room in roomList)
            {
                if (roomAreaReg.IsMatch(room.Children[1].TextContent))
                {
                    roomsJson.Add(room.Children[0].TextContent, room.Children[1].TextContent);
                    totalFloorArea += decimal.Parse(room.Children[1].TextContent.Substring(0, room.Children[1].TextContent.Length - 2));
                }
                else
                {
                    throw new Exception("The format of room area is not as expected:" + room.ChildNodes[2].TextContent);
                }
            }
            floorAreaDetail = roomsJson.ToString();

            Home home = new Home()
            {
                BuildingTotalFloors = totalFloors,
                ConstructionArea    = constructionArea,
                FloorArea           = totalFloorArea,
                FloorAreaDetail     = floorAreaDetail,
                Bedrooms            = bedrooms,
                Bathrooms           = bathrooms
            };

            //totalPrice could not be found on the page when the community is being 限价
            if (totalPrice > 0)
            {
                home.HomeListingPrice = new List <HomeListingPrice> {
                    new HomeListingPrice()
                    {
                        ListingPrice = totalPrice, ListingPriceDate = DateTime.Now.Date
                    }
                }
            }
            ;

            return(home);
        }
    }