/// <summary> /// The extract all ad information. /// </summary> /// <param name="ad"> /// The ad. /// </param> /// <returns> /// The <see cref="JObject"/>. /// </returns> public static JObject ExtractAllAdInformation(JObject ad) { var web = new HtmlWeb { OverrideEncoding = System.Text.Encoding.GetEncoding(Encoding) }; var doc = web.Load(ad["Url"].ToString().StartsWith("//") ? "https:" + ad["Url"] : ad["Url"].ToString()); var content = doc.DocumentNode.SelectSingleNode("//div[@class='content-border']"); var pictures = new List<string>(); if (content.SelectNodes("//div[@id='thumbs_carousel']//span[@class='thumbs']") != null) { foreach (var picture in content.SelectNodes("//div[@id='thumbs_carousel']//span[@class='thumbs']")) { var pictureUrl = picture.GetAttributeValue("style", string.Empty) .Replace("background-image: url('", string.Empty) .Replace("thumbs", "images") .Replace("');", string.Empty); pictures.Add(pictureUrl.StartsWith("//") ? "https:" + pictureUrl : pictureUrl); } } else if (content.SelectSingleNode("//div[@class='images_cadre']/a") != null) { var picture = content.SelectSingleNode("//div[@class='images_cadre']/a"); var pictureUrl = picture.GetAttributeValue("style", string.Empty) .Replace("background-image: url('", string.Empty) .Replace("');", string.Empty); pictures.Add(pictureUrl.StartsWith("//") ? "https:" + pictureUrl : pictureUrl); } //HtmlNode phoneNode = adContent.SelectSingleNode("//span[@class='lbcPhone']/span[@id='phoneNumber']/a"); var commercialNode = content.SelectSingleNode("//div[@class='lbc_nosalesmen']"); var nameNode = content.SelectSingleNode("//div[@class='upload_by']/a"); var emailNode = content.SelectSingleNode("//div[@class='lbc_links']/a[@class='sendMail']"); var parameters = new List<string>(); if ( content.SelectNodes( "//div[contains(@class, 'lbcParamsContainer')]/div[contains(@class, 'lbcParams')]//tr") != null) { foreach (var parameter in content.SelectNodes( "//div[contains(@class, 'lbcParamsContainer')]/div[contains(@class, 'lbcParams')]//tr")) { /*if (parameter.Attributes["itemprop"] != null && parameter.Attributes["itemprop"].Value == "geo") { latitudeNode = parameter.SelectSingleNode("//td//meta[@itemprop='latitude']"); longitudeNode = parameter.SelectSingleNode("//td//meta[@itemprop='longitude']"); } else {*/ if (parameter.SelectSingleNode("th") == null) { continue; } var title = parameter.SelectSingleNode("th").InnerText.Replace(":", string.Empty).Trim(); string value; if (parameter.SelectSingleNode("td//span") != null) { value = parameter.SelectSingleNode("td//span").InnerText; } else if (parameter.SelectSingleNode("td//a") != null) { value = parameter.SelectSingleNode("td//a").InnerText; } else { value = parameter.SelectSingleNode("td").InnerText; } parameters.Add(title + ": " + value); } } var positionScript = content.SelectSingleNode("//div[@class='colRight']/script"); string latitudeNode = null; string longitudeNode = null; var ville = true; if (positionScript != null) { var regex = new Regex( "var\\s(\\w*)\\s=.*(?:\\\"(.*)\\\").*", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); var values = positionScript.InnerText.Split("\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); foreach (var match in values.Select(s => regex.Match(s))) { if (match.Groups.Count < 3) { continue; } if (match.Groups[1].Value == "lat") { latitudeNode = match.Groups[2].Value.Trim(); } if (match.Groups[1].Value == "lng") { longitudeNode = match.Groups[2].Value.Trim(); } if (match.Groups[1].Value == "source") { ville = match.Groups[2].Value.Trim() != "address"; } } } var descriptionNode = content.SelectSingleNode("//div[@class='AdviewContent']/div[@class='content']"); using (var writer = ((JArray)ad["Contents"]).CreateWriter()) { var existingPictures = ad.GetTokenValue("Contents>PictureUrl"); foreach (var picture in pictures.Where(picture => !existingPictures.Contains(picture))) { writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("PictureUrl"); writer.WritePropertyName("Value"); writer.WriteValue(picture); writer.WriteEndObject(); } // TODO : find good solution to get phone // ad.AddContentsToToken("Phone", phoneNode != null ? GetPhoneUrl(phoneNode.GetAttributeValue("href", string.Empty)) : string.Empty), "Contents"); writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("AllowCommercial"); writer.WritePropertyName("Value"); writer.WriteValue(commercialNode == null); writer.WriteEndObject(); writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("Name"); writer.WritePropertyName("Value"); writer.WriteValue(nameNode != null ? nameNode.InnerText : string.Empty); writer.WriteEndObject(); writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("ContactUrl"); writer.WritePropertyName("Value"); writer.WriteValue(emailNode != null ? (emailNode.GetAttributeValue("href", string.Empty).StartsWith("//") ? "https:" + emailNode.GetAttributeValue("href", string.Empty) : emailNode.GetAttributeValue("href", string.Empty)) : string.Empty); writer.WriteEndObject(); foreach (var parameter in parameters) { writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("Param"); writer.WritePropertyName("Value"); writer.WriteValue(parameter); writer.WriteEndObject(); } writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("Description"); writer.WritePropertyName("Value"); writer.WriteValue(descriptionNode != null ? descriptionNode.InnerHtml : string.Empty); writer.WriteEndObject(); if (ville) { return ad; } writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("Latitude"); writer.WritePropertyName("Value"); writer.WriteValue(latitudeNode); writer.WriteEndObject(); writer.WriteStartObject(); writer.WritePropertyName("Type"); writer.WriteValue("Longitude"); writer.WritePropertyName("Value"); writer.WriteValue(longitudeNode); writer.WriteEndObject(); } return ad; }
/// <summary> /// The store. /// </summary> /// <param name="ad"> /// The ad. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> /// <exception cref="Exception"> /// If search doesn't exist /// </exception> public bool Store(JObject ad) { decimal adId; using (var db = new ApplicationDbContext()) { var url = (string) ad["Url"]; var databaseAd = db.Ads.FirstOrDefault(entry => entry.Search.ID == this.searchId && entry.Url == url); // If ad have same url if (databaseAd != null) { return false; } var uniqueId = ad["Title"] + ad.GetTokenValue("Contents>Place").FirstOrDefault() + ad.GetTokenValue("Contents>Price").FirstOrDefault() + ad.GetTokenValue("Contents>Description").FirstOrDefault(); var hash = uniqueId.GetMd5Hash(); databaseAd = db.Ads.FirstOrDefault(entry => entry.Search.ID == this.searchId && entry.Hash == hash); // If ad have same hash if (databaseAd != null) { return false; } var s = db.Searches.FirstOrDefault(entry => entry.ID == this.searchId); if (s == null) { throw new Exception("Recherche inexistante..."); } adId = db.Database.SqlQuery<decimal>("exec AddAd @search_id, @url, @date, @title, @hash", new SqlParameter("search_id", s.ID), new SqlParameter("url", (string)ad["Url"]), new SqlParameter("date", DateTime.Parse((string)ad["Date"], new CultureInfo("fr-FR"))), new SqlParameter("title", (string)ad["Title"]), new SqlParameter("hash", hash)).First(); } var contents = new List<AdContent>(); var content = new AdContent { Type = AdContent.ContentType.AllowCommercial.ToString(), Value = ad.GetTokenValue("Contents>AllowCommercial").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.ContactUrl.ToString(), Value = ad.GetTokenValue("Contents>ContactUrl").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Description.ToString(), Value = ad.GetTokenValue("Contents>Description").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Latitude.ToString(), Value = ad.GetTokenValue("Contents>Latitude").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Longitude.ToString(), Value = ad.GetTokenValue("Contents>Longitude").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Name.ToString(), Value = ad.GetTokenValue("Contents>Name").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Phone.ToString(), Value = ad.GetTokenValue("Contents>Phone").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Place.ToString(), Value = ad.GetTokenValue("Contents>Place").FirstOrDefault() }; contents.Add(content); content = new AdContent { Type = AdContent.ContentType.Price.ToString(), Value = ad.GetTokenValue("Contents>Price").FirstOrDefault() }; contents.Add(content); foreach (var param in ad.GetTokenValue("Contents>Param")) { content = new AdContent { Type = AdContent.ContentType.Param.ToString(), Value = param }; contents.Add(content); } foreach (var picture in ad.GetTokenValue("Contents>PictureUrl")) { content = new AdContent { Type = AdContent.ContentType.PictureUrl.ToString(), Value = picture }; contents.Add(content); } using (var db = new ApplicationDbContext()) { foreach (var adContent in contents.Where(c => c.Value != null)) { db.Database.ExecuteSqlCommand("exec AddAdContent @ad_id, @type, @value", new SqlParameter("ad_id", (int)adId), new SqlParameter("type", adContent.Type), new SqlParameter("value", adContent.Value)); } } using (var writer = ad.CreateWriter()) { writer.WritePropertyName("Id"); writer.WriteValue(adId); writer.WritePropertyName("SearchId"); writer.WriteValue(this.searchId); } return true; }