public static void SaveIds(List <string> ids) { try { using (var context = new SCrawlerEntities()) { var exportedIds = ids.Select(id => new ExportedProperties() { ExportedPropertyId = id }); context.ExportedProperties.AddRange(exportedIds); context.SaveChanges(); } } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); } }
/// <summary> /// Σκανάρει τη σελίδες αναζήτησης του spitogatos.gr /// </summary> /// <returns>Tα ids που δεν έχουμε ήδη κατεβάσει</returns> public async static Task <List <string> > GetNewPropertyIds(int pages, HttpClient client, string url) { var allPropertyIds = new List <string>(); for (int i = 1; i <= pages; i++) { Console.WriteLine("Loading page " + i + " from " + pages); try { var html = await client.GetStringAsync(new Uri(url + $"/offset_{i * 10}")); //wait int s = _random.Next(1, 2); Thread.Sleep(s * 1000); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); //for each property on the page get the contents of media div try { var mediaDivs = htmlDocument.DocumentNode.Descendants("div") .Where(id => id.GetAttributeValue("id", "").Contains("searchDetailsListings")) .SingleOrDefault() .SelectNodes("div/div") .Where(n => n.GetAttributeValue("class", "").Contains("media")) .ToList(); var notFromRealEstateHrefs = mediaDivs //.Where(p => !p.SelectSingleNode("div/div/a[1]").GetAttributeValue("href", "").Contains("Κτηματομεσίτης")) .Select(n => n.SelectSingleNode("a[1]").GetAttributeValue("href", "")) .ToList(); //split href string on - and get the last part which is the property id var pageItemIds = new List <string>(); foreach (var pageItemAnchor in notFromRealEstateHrefs) { var parts = pageItemAnchor.Split('-'); int length = parts.Length; pageItemIds.Add(parts[length - 1].Substring(1)); } if (pageItemIds.Count() > 0) { allPropertyIds.AddRange(pageItemIds); } } catch (Exception) { //this exception is thrown every time the program reaches a page with no properties //no reason to continue searching subsequent pages break; } } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); throw; } } //select only non-exported propertyIds var newPropertyIds = new List <string>(); try { using (var context = new SCrawlerEntities()) { var expoertedPropertyIds = context.ExportedProperties.Select(p => p.ExportedPropertyId).ToList(); newPropertyIds = allPropertyIds.Where(i => !expoertedPropertyIds.Contains(i)).ToList(); } return(newPropertyIds); } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); throw; } }