public void BulkInsertHouses(List <BaseHouseInfo> list) { if (list == null || list.Count == 0) { return; } var tableName = ConstConfigName.GetTableName(list.FirstOrDefault().Source); using (IDbConnection dbConnection = GetConnection()) { dbConnection.Open(); IDbTransaction transaction = dbConnection.BeginTransaction(); var result = dbConnection.Execute("INSERT INTO " + tableName + @" (`HouseTitle`, `HouseOnlineURL`, `HouseLocation`, `DisPlayPrice`, `PubTime`, `HousePrice`, `LocationCityName`, `Source`, `HouseText`, `IsAnalyzed`, `Status`,`PicURLs`) VALUES (@HouseTitle, @HouseOnlineURL, @HouseLocation, @DisPlayPrice, @PubTime, @HousePrice, @LocationCityName, @Source, @HouseText, @IsAnalyzed, @Status,@PicURLs) ON DUPLICATE KEY UPDATE DataChange_LastTime=now();", list, transaction: transaction); transaction.Commit(); } LogHelper.RunActionTaskNotThrowEx(() => { elasticsearchService.SaveHousesToES(list); }, "SaveHousesToES"); }
public void Run() { foreach (var city in configuration.CityList) { for (var page = 1; page <= 100; page++) { var houses = new List <HouseInfo>(); var houseUrl = $"https://{city.Code}.lianjia.com/ershoufang/pg{page}/"; var houseHTML = GetHTML(houseUrl); var htmlDoc = htmlParser.Parse(houseHTML); var houseUL = htmlDoc.QuerySelector("ul.sellListContent"); if (houseUL == null) { continue; } foreach (var item in houseUL.QuerySelectorAll("li.clear")) { var title = item.QuerySelector("div.title"); if (title == null) { continue; } var house = new HouseInfo(); house.HouseTitle = title.QuerySelector("a").TextContent; house.OnlineURL = title.QuerySelector("a").GetAttribute("href"); var address = item.QuerySelector("div.houseInfo"); if (address != null) { var addressList = address.TextContent.Split("/"); if (addressList.Any()) { house.Address = address.QuerySelector("a").TextContent; house.HouseType = addressList.FirstOrDefault(text => text.Contains("室")); house.Area = decimal.Parse(addressList.FirstOrDefault(text => text.Contains("平米")).Replace("平米", "")); } } var timeText = item.QuerySelector("div.timeInfo").TextContent; var pubDay = 0; if (timeText.Contains("天")) { pubDay = int.Parse(timeText.Replace("天以前发布", "")); } else if (timeText.Contains("月")) { pubDay = int.Parse(timeText.Replace("个月以前发布", "")) * 30; } house.PubTime = DateTime.Now.AddDays(-pubDay); house.TotalPrice = decimal.Parse(item.QuerySelector("div.totalPrice").QuerySelector("span").TextContent); house.UnitPrice = decimal.Parse(item.QuerySelector("div.unitPrice").GetAttribute("data-price")); house.Pictures = new List <string>() { item.QuerySelector("img.lj-lazy").GetAttribute("src") }; house.HouseText = item.InnerHtml; house.CityName = city.Name; houses.Add(house); } elasticsearchService.SaveHousesToES(houses); } } }