public List<RentedApartment> Parse(string siteDomain, string html, string[] adOwnerLoginNameFilter, string[] adDescriptionFilter) { string cHtml = String.Empty; if (!String.IsNullOrEmpty(html)) { int startIndex = html.IndexOf(StartDataFlag); if (startIndex > 0) { cHtml = html.Remove(0, startIndex + StartDataFlag.Length); int endIndex = cHtml.IndexOf(EndDataFlag); if (endIndex > 0) cHtml = cHtml.Remove(endIndex, cHtml.Length - endIndex); } } RentedApartmentCollection collection = new RentedApartmentCollection(); List<RentedApartment> apartments = new List<RentedApartment>(); string[] stringSeparators = new string[] { "<tr class=\"advertRow\">" }; string[] matchAds = cHtml.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries); foreach (string matchAd in matchAds) { RentedApartment apartment = new RentedApartment(); string ad_id = ParserHelper.GetDataValue(matchAd, AdIdStartPattern, AdIdEndPattern); if (!String.IsNullOrEmpty(ad_id)) apartment.AdId = ulong.Parse(ad_id); else continue; apartment.IsFiltered = false; string date_create = ParserHelper.GetDataValue(matchAd, AdCreatedStartPattern, AdCreatedEndPattern); if (!String.IsNullOrEmpty(date_create)) apartment.AdCreated = GetDateTime(date_create); string price_usd = ParserHelper.GetDataValue(matchAd, PriceStartPattern, PriceEndPattern); if (!String.IsNullOrEmpty(price_usd)) { ulong price = 0; ulong.TryParse(price_usd, out price); apartment.Price = price; } // http://irr.by/user/488351/ string user_url = ParserHelper.GetDataValue(matchAd, AdOwnerUserUrlStartPattern, AdOwnerUserUrlEndPattern); if (!String.IsNullOrEmpty(user_url)) { apartment.AdOwnerUserUrl = siteDomain + user_url; // Parse OwnerId string ownerIdStr = Regex.Match(user_url, @"\d+").Value; if (!String.IsNullOrEmpty(ownerIdStr)) apartment.AdOwnerId = ulong.Parse(ownerIdStr); } GetLinkAndTitle(siteDomain, apartment, matchAd, LinkStartPattern, LinkEndPattern); string user_login = ParserHelper.GetDataValue(matchAd, AdOwnerLoginNameStartPattern, AdOwnerLoginNameEndPattern); if (!String.IsNullOrEmpty(user_login)) { if (adOwnerLoginNameFilter != null && !apartment.IsFiltered) { foreach (string filter in adOwnerLoginNameFilter) { if (user_login.IndexOf(filter, 0, StringComparison.InvariantCultureIgnoreCase) >= 0) { apartment.IsFiltered = true; break; } } } apartment.AdOwnerLoginName = user_login; } string description = ParserHelper.GetDataValue(matchAd, DescriptionStartPattern, DescriptionEndPattern); if (!String.IsNullOrEmpty(description)) { if (adDescriptionFilter != null && !apartment.IsFiltered) { foreach (string filter in adDescriptionFilter) { if (description.IndexOf(filter, 0, StringComparison.InvariantCultureIgnoreCase) >= 0) { apartment.IsFiltered = true; break; } } } apartment.Description = description; } apartments.Add(apartment); } return apartments; }
public RentedApartmentCollection GetData(int itemsOnPage, int priceUpperLimit) { string queryString = SiteDomain + String.Format(Uri, priceUpperLimit, itemsOnPage); List<RentedApartment> apartments = new List<RentedApartment>(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(queryString); if (UseProxy) { request.Proxy = HttpWebRequest.DefaultWebProxy; request.Proxy.Credentials = CredentialCache.DefaultNetworkCredentials; request.PreAuthenticate = true; } request.UseDefaultCredentials = true; request.Accept = "application/json, text/javascript, */*; q=0.01"; request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0"; request.Headers["Accept-Language"] = "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3"; using(WebResponse response = request.GetResponse()) using (Stream dataStream = response.GetResponseStream()) { string html = String.Empty; if (dataStream.CanRead) { try { using (StreamReader sr = new StreamReader(dataStream)) { html = sr.ReadToEnd(); } } catch (IOException ex) { Logging.Log.Error(String.Format("Сайт {0} недоступен. Попробуйте позже", SiteDomain), ex); } finally { dataStream.Close(); response.Close(); } IrrByRentParser parser = new IrrByRentParser(); string[] adOwnerLoginNameFilterArr = null; string[] adDescriptionFilterArr = null; if (!String.IsNullOrEmpty(AdOwnerLoginNameFilter)) adOwnerLoginNameFilterArr = AdOwnerLoginNameFilter.Split(';'); if (!String.IsNullOrEmpty(AdDescriptionFilter)) adDescriptionFilterArr = AdDescriptionFilter.Split(';'); apartments = parser.Parse(SiteDomain, html, adOwnerLoginNameFilterArr, adDescriptionFilterArr); } } RentedApartmentCollection collection = new RentedApartmentCollection { apartments = apartments, SiteDomain = SiteDomain }; return collection; }