/// <summary> /// Σκανάρει τη σελίδες αναζήτησης της ΧΕ /// </summary> /// <returns>Tα ids που δεν έχουμε ήδη κατεβάσει</returns> public async static Task <List <string> > GetNewPropertyIds(int pages, HttpClient client, string url) { var allPropertyIds = new List <string>(); for (int i = 1; i <= pages; i++) { Console.WriteLine("Loading page " + i + " from " + pages); try { var html = await client.GetStringAsync(new Uri(url + "&page=" + i)); //wait int s = _random.Next(1, 14); Thread.Sleep(s * 1000); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var pageItems = htmlDocument.DocumentNode.Descendants("div") .Where(n => n.GetAttributeValue("class", "").Contains("lazy")) .Select(id => id.GetAttributeValue("data-id", "")).ToList(); if (pageItems.Count() == 0) { Console.WriteLine($"\t no properties found on page {i}"); break; } allPropertyIds.AddRange(pageItems); } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); throw; } } //select only non-exported propertyIds var newPropertyIds = new List <string>(); try { using (var context = new XECrawlerEntities()) { var expoertedPropertyIds = context.ExportedProperties.Select(p => p.ExportedPropertyId).ToList(); newPropertyIds = allPropertyIds.Where(i => !expoertedPropertyIds.Contains(i)).ToList(); } return(newPropertyIds); } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); throw; } }
public static void SaveIds(List <string> ids) { try { using (var context = new XECrawlerEntities()) { var exportedIds = ids.Select(id => new ExportedProperties() { ExportedPropertyId = id }); context.ExportedProperties.AddRange(exportedIds); context.SaveChanges(); } } catch (Exception ex) { Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}"); } }