/// <summary>
        /// Σκανάρει τη σελίδες αναζήτησης της ΧΕ
        /// </summary>
        /// <returns>Tα ids που δεν έχουμε ήδη κατεβάσει</returns>
        public async static Task <List <string> > GetNewPropertyIds(int pages, HttpClient client, string url)
        {
            var allPropertyIds = new List <string>();

            for (int i = 1; i <= pages; i++)
            {
                Console.WriteLine("Loading page " + i + " from " + pages);

                try
                {
                    var html = await client.GetStringAsync(new Uri(url + "&page=" + i));

                    //wait
                    int s = _random.Next(1, 14);
                    Thread.Sleep(s * 1000);

                    var htmlDocument = new HtmlDocument();
                    htmlDocument.LoadHtml(html);

                    var pageItems = htmlDocument.DocumentNode.Descendants("div")
                                    .Where(n => n.GetAttributeValue("class", "").Contains("lazy"))
                                    .Select(id => id.GetAttributeValue("data-id", "")).ToList();

                    if (pageItems.Count() == 0)
                    {
                        Console.WriteLine($"\t no properties found on page {i}");
                        break;
                    }

                    allPropertyIds.AddRange(pageItems);
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}");
                    throw;
                }
            }

            //select only non-exported propertyIds
            var newPropertyIds = new List <string>();

            try
            {
                using (var context = new XECrawlerEntities())
                {
                    var expoertedPropertyIds = context.ExportedProperties.Select(p => p.ExportedPropertyId).ToList();

                    newPropertyIds = allPropertyIds.Where(i => !expoertedPropertyIds.Contains(i)).ToList();
                }

                return(newPropertyIds);
            }
            catch (Exception ex)
            {
                Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}");
                throw;
            }
        }
Пример #2
0
 public static void SaveIds(List <string> ids)
 {
     try
     {
         using (var context = new XECrawlerEntities())
         {
             var exportedIds = ids.Select(id => new ExportedProperties()
             {
                 ExportedPropertyId = id
             });
             context.ExportedProperties.AddRange(exportedIds);
             context.SaveChanges();
         }
     }
     catch (Exception ex)
     {
         Console.WriteLine($"Exception message: {ex.Message}, Inner Exception: {ex.InnerException}");
     }
 }