public List<RentedApartment> Parse(string siteDomain, string html, string[] adOwnerLoginNameFilter, string[] adDescriptionFilter) { string cHtml = String.Empty; if (!String.IsNullOrEmpty(html)) { int startIndex = html.IndexOf(StartDataFlag); if (startIndex > 0) { cHtml = html.Remove(0, startIndex + StartDataFlag.Length); int endIndex = cHtml.IndexOf(EndDataFlag); if (endIndex > 0) cHtml = cHtml.Remove(endIndex, cHtml.Length - endIndex); } } RentedApartmentCollection collection = new RentedApartmentCollection(); List<RentedApartment> apartments = new List<RentedApartment>(); string[] stringSeparators = new string[] { "<tr class=\"advertRow\">" }; string[] matchAds = cHtml.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries); foreach (string matchAd in matchAds) { RentedApartment apartment = new RentedApartment(); string ad_id = ParserHelper.GetDataValue(matchAd, AdIdStartPattern, AdIdEndPattern); if (!String.IsNullOrEmpty(ad_id)) apartment.AdId = ulong.Parse(ad_id); else continue; apartment.IsFiltered = false; string date_create = ParserHelper.GetDataValue(matchAd, AdCreatedStartPattern, AdCreatedEndPattern); if (!String.IsNullOrEmpty(date_create)) apartment.AdCreated = GetDateTime(date_create); string price_usd = ParserHelper.GetDataValue(matchAd, PriceStartPattern, PriceEndPattern); if (!String.IsNullOrEmpty(price_usd)) { ulong price = 0; ulong.TryParse(price_usd, out price); apartment.Price = price; } // http://irr.by/user/488351/ string user_url = ParserHelper.GetDataValue(matchAd, AdOwnerUserUrlStartPattern, AdOwnerUserUrlEndPattern); if (!String.IsNullOrEmpty(user_url)) { apartment.AdOwnerUserUrl = siteDomain + user_url; // Parse OwnerId string ownerIdStr = Regex.Match(user_url, @"\d+").Value; if (!String.IsNullOrEmpty(ownerIdStr)) apartment.AdOwnerId = ulong.Parse(ownerIdStr); } GetLinkAndTitle(siteDomain, apartment, matchAd, LinkStartPattern, LinkEndPattern); string user_login = ParserHelper.GetDataValue(matchAd, AdOwnerLoginNameStartPattern, AdOwnerLoginNameEndPattern); if (!String.IsNullOrEmpty(user_login)) { if (adOwnerLoginNameFilter != null && !apartment.IsFiltered) { foreach (string filter in adOwnerLoginNameFilter) { if (user_login.IndexOf(filter, 0, StringComparison.InvariantCultureIgnoreCase) >= 0) { apartment.IsFiltered = true; break; } } } apartment.AdOwnerLoginName = user_login; } string description = ParserHelper.GetDataValue(matchAd, DescriptionStartPattern, DescriptionEndPattern); if (!String.IsNullOrEmpty(description)) { if (adDescriptionFilter != null && !apartment.IsFiltered) { foreach (string filter in adDescriptionFilter) { if (description.IndexOf(filter, 0, StringComparison.InvariantCultureIgnoreCase) >= 0) { apartment.IsFiltered = true; break; } } } apartment.Description = description; } apartments.Add(apartment); } return apartments; }
/// <summary> /// /// </summary> /// <param name="apartment"></param> /// <param name="source"></param> /// <param name="startPattern"></param> /// <param name="endPattern"></param> private static void GetLinkAndTitle(string siteDomain, RentedApartment apartment, string source, string startPattern, char endPattern) { if (String.IsNullOrEmpty(source) || apartment == null) return; int startIndex = source.IndexOf(startPattern); if (startIndex < 0) return; string startPart = source.Substring(startIndex + startPattern.Length); StringBuilder sbLink = new StringBuilder(); StringBuilder sbTitle = new StringBuilder(); bool parsingLink = true; foreach (char symb in startPart) { if (symb == endPattern) break; if (parsingLink && symb != '"') sbLink.Append(symb); else if (parsingLink) parsingLink = false; else if (symb != '>') sbTitle.Append(symb); } apartment.Title = sbTitle.ToString(); apartment.AdUrl = siteDomain + sbLink.ToString(); }