public static async Task <int> ReadCommunityHomeListPageNumber(string url) { IDocument document = await WebPageReader.GetPageAsync(url); var cell = document.QuerySelector("div.house-lst-page-box"); if (cell == null) { return(1); } var pageData = cell.GetAttribute("page-data"); int totalPage = 0; try { JObject jsonObj = JObject.Parse(pageData); totalPage = Convert.ToInt32(jsonObj["totalPage"]); } catch (Exception ex) { throw new Exception("page data is:" + pageData, ex); } return(totalPage); }
public static async Task <List <string> > ReadCommunityHomeURL(string url) { IDocument document = await WebPageReader.GetPageAsync(url); List <string> homeURLs = new List <string>(); var homeItemList = document.QuerySelectorAll("ul.sellListContent li.clear"); if (homeItemList == null) { return(homeURLs); } foreach (var homeItem in homeItemList) { IElement homeTitle = homeItem.QuerySelector("div.info div.title a"); if (homeTitle != null) { homeURLs.Add(homeTitle.GetAttribute("href")); } } return(homeURLs); }
//The url will be like 'https://xa.ke.com/xiaoqu/pg1/' which the number is the page number public static async Task <List <Community> > ReadCommunityListData(string url) { IDocument document = await WebPageReader.GetPageAsync(url); var communityItemList = document.QuerySelectorAll("ul.listContent li.xiaoquListItem"); List <Community> communities = new List <Community>(); foreach (var communityItem in communityItemList) { string communityName = communityItem.QuerySelector("div.info div.title a").InnerHtml; string districtName = communityItem.QuerySelector("div.info div.positionInfo a.district").InnerHtml; string neighborhood = communityItem.QuerySelector("div.info div.positionInfo a.bizcircle").InnerHtml; string listingPrice = communityItem.QuerySelector("div.xiaoquListItemRight div.xiaoquListItemPrice div.totalPrice span").InnerHtml; string listingUnits = communityItem.QuerySelector("div.xiaoquListItemRight div.xiaoquListItemSellCount a.totalSellCount span").InnerHtml; string seashellId = communityItem.GetAttribute("data-id"); string seashellURL = communityItem.QuerySelector("div.info div.title a").GetAttribute("href"); SeashellContext context = new SeashellContext(); AdministrativeDistrict administrativeDistrict = new AdministrativeDistrictRepository(context).GetByName(districtName); Community communityToAdd = new Community() { CommunityName = communityName, AdministrativeDistrictId = administrativeDistrict.AdministrativeDistrictId, Neighborhood = neighborhood, External_id = seashellId, SeashellURL = seashellURL, CommunityHistoryInfo = new List <CommunityHistoryInfo>().Append(new CommunityHistoryInfo() { CommunityListingPrice = decimal.TryParse(listingPrice, out decimal price) ? price : 0, CommunityListingUnits = int.TryParse(listingUnits, out int units) ? units : 0, DataTime = DateTime.Now.Date }).ToList()
public async Task TestGetPageAsync() { string url = "https://ke-image.ljcdn.com/hdic-frame/standard_3bb359d7-db9d-46b5-af3b-982efc6bfa13.png!m_fill,w_1000,h_750,l_bk,f_jpg,ls_50?from=ke.com"; var document = await WebPageReader.GetPageAsync(url); string stream = document.Source.Text; Assert.IsNotNull(stream); }
private async void readBtn_Click(object sender, EventArgs e) { try { Task <string> tsk = WebPageReader.ReadPage(urlTb.Text); webPageContentTb.Text = await tsk; } catch (Exception ex) { MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error); } }
public void PrototypeWithoutBuiltIn() { const string webUrl = "http://www.google.com"; var reader = new WebPageReader(webUrl); reader.PrintPageInfo(); var cloneReader = reader.Clone(); //Assert.Equals(cloneReader.Title, reader.Title); //Assert.Equals(cloneReader.Header, reader.Header); //Assert.Equals(cloneReader.Body, reader.Body); Assert.AreNotSame(reader, cloneReader); }
private async void footballBetclicBtn_Click(object sender, EventArgs e) { try { BetclicParser parser = new BetclicParser(); //List<BetParsingInformation> bets = parser.ReadPageContent(Sport.FOOTBALL, webPageContentTb.Text); List <BetParsingInformation> bets = parser.ReadSourceCode(Sport.FOOTBALL, WebPageReader.ReadSourceCode(urlTb.Text)); parser.ReadDetailedPageContent(Sport.FOOTBALL, WebPageReader.ReadSourceCode(bets[0].Url), bets[0].Odds); } catch (Exception ex) { MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error); } }
private void betclicDetailFootballBtn_Click(object sender, EventArgs e) { try { BetclicParser parser = new BetclicParser(); List <BetParsingInformation> bets2 = parser.ReadSourceCode(Sport.FOOTBALL, WebPageReader.ReadSourceCode(urlTb.Text)); } catch (Exception ex) { MessageBox.Show(ex.Message, ex.Source, MessageBoxButtons.OK, MessageBoxIcon.Error); } }
public static async Task <Home> ReadCommunityHomeDetail(string url) { IDocument document = await WebPageReader.GetPageAsync(url); IHtmlCollection <IElement> infoListItem = document.QuerySelectorAll("div.base div.content ul li"); foreach (IElement item in infoListItem) { item.RemoveChild(item.FirstChild); } int bedrooms = 0; int bathrooms = 0; int totalFloors = 0; decimal constructionArea = 0; Regex roomInfoReg = new Regex("[0-9]室[0-9]厅[0-9]卫"); string roomInfo = infoListItem[0].InnerHtml; if (roomInfoReg.IsMatch(roomInfo)) { bedrooms = int.Parse(roomInfo.Substring(0, 1)); bathrooms = int.Parse(roomInfo.Substring(4, 1)); } else { throw new Exception("The format of roomInfo is not as expected:" + roomInfo); } Regex floorInfoReg = new Regex("[高中低]楼层 [(]共[0-9]{1,2}层[)]"); string floorInfo = infoListItem[1].InnerHtml; if (floorInfoReg.IsMatch(floorInfo)) { totalFloors = int.Parse(System.Text.RegularExpressions.Regex.Replace(floorInfo, @"[^0-9]+", "")); } else { throw new Exception("The format of floorInfo is not as expected:" + floorInfo); } Regex areaReg = new Regex("^([1-9][0-9]*)+(.{0,1}[0-9]{0,2})㎡"); string area = infoListItem[2].InnerHtml; if (areaReg.IsMatch(area)) { decimal.TryParse(area.Substring(0, area.Length - 1), out constructionArea); } else { throw new Exception("The format of area is not as expected:" + area); } decimal totalPrice = 0; IElement priceEle = document.QuerySelector("div.price span.total"); if (priceEle != null && !decimal.TryParse(priceEle.InnerHtml, out totalPrice)) { throw new Exception("The format of priceEle is not as expected:" + priceEle.InnerHtml); } decimal totalFloorArea = 0; string floorAreaDetail = string.Empty; IHtmlCollection <IElement> roomList = document.QuerySelectorAll("div.layout div#infoList div.row"); JObject roomsJson = new JObject(); Regex roomAreaReg = new Regex("^([1-9][0-9]*)+(.{0,1}[0-9]{0,2})平米"); foreach (IElement room in roomList) { if (roomAreaReg.IsMatch(room.Children[1].TextContent)) { roomsJson.Add(room.Children[0].TextContent, room.Children[1].TextContent); totalFloorArea += decimal.Parse(room.Children[1].TextContent.Substring(0, room.Children[1].TextContent.Length - 2)); } else { throw new Exception("The format of room area is not as expected:" + room.ChildNodes[2].TextContent); } } floorAreaDetail = roomsJson.ToString(); Home home = new Home() { BuildingTotalFloors = totalFloors, ConstructionArea = constructionArea, FloorArea = totalFloorArea, FloorAreaDetail = floorAreaDetail, Bedrooms = bedrooms, Bathrooms = bathrooms }; //totalPrice could not be found on the page when the community is being 限价 if (totalPrice > 0) { home.HomeListingPrice = new List <HomeListingPrice> { new HomeListingPrice() { ListingPrice = totalPrice, ListingPriceDate = DateTime.Now.Date } } } ; return(home); } }