private void CreateImagesJsonFiles(List <Feature> features, ZipOutputStream zipStream)
        {
            var items          = new ConcurrentBag <ImageItem>();
            var downloadedUrls = _imagesRepository.GetAllUrls().Result.ToHashSet();

            _logger.LogInformation($"Staring Image file creation: {features.Count} features, exiting images: {downloadedUrls.Count}");
            Parallel.ForEach(features, new ParallelOptions {
                MaxDegreeOfParallelism = 10
            }, (feature) =>
            {
                var urls = feature.Attributes.GetNames()
                           .Where(n => n.StartsWith(FeatureAttributes.IMAGE_URL)).Select(n => feature.Attributes[n].ToString())
                           .Where(u => !string.IsNullOrWhiteSpace(u));
                foreach (var url in urls)
                {
                    if (!downloadedUrls.Contains(url))
                    {
                        _logger.LogWarning("The following image does not exist in database: " + url + " feature: " + feature.GetId());
                        continue;
                    }
                    items.Add(_imagesRepository.GetImageByUrl(url).Result);
                }
            });

            var list  = items.ToList();
            var index = 0;

            while (list.Count > 0)
            {
                var imageItemsString = JsonConvert.SerializeObject(list.Take(1000).ToList(), new JsonSerializerSettings
                {
                    ContractResolver = new CamelCasePropertyNamesContractResolver()
                });
                var newEntry = new ZipEntry($"images/images{index:000}.json")
                {
                    DateTime = DateTime.Now
                };
                zipStream.PutNextEntry(newEntry);
                StreamUtils.Copy(new MemoryStream(Encoding.UTF8.GetBytes(imageItemsString)), zipStream, new byte[4096]);
                zipStream.CloseEntry();
                list = list.Skip(1000).ToList();
                index++;
            }
            _logger.LogInformation("Finished Image file creation: " + items.Count());
        }
示例#2
0
        /// <inheritdoc/>
        public async Task DownloadAndStoreUrls(List <string> imagesUrls)
        {
            var exitingUrls = await _imagesRepository.GetAllUrls();

            var needToRemove = exitingUrls.Except(imagesUrls).ToList();

            _logger.LogInformation($"Need to remove {needToRemove.Count} images that are no longer relevant");
            foreach (var imageUrlToRemove in needToRemove)
            {
                await _imagesRepository.DeleteImageByUrl(imageUrlToRemove);
            }
            _logger.LogInformation($"Finished removing images, starting downloading and index: {imagesUrls.Count}");
            using (var md5 = MD5.Create())
            {
                var counter = 0;
                Parallel.ForEach(imagesUrls, new ParallelOptions {
                    MaxDegreeOfParallelism = 20
                }, (imageUrl) =>
                {
                    try
                    {
                        Interlocked.Increment(ref counter);
                        if (counter % 100 == 0)
                        {
                            _logger.LogInformation($"Indexed {counter} images of {imagesUrls.Count}");
                        }
                        if (exitingUrls.Contains(imageUrl))
                        {
                            var size = _remoteFileFetcherGateway.GetFileSize(imageUrl).Result;
                            if (size > 0)
                            {
                                return;
                            }
                        }
                        var content = new byte[0];
                        for (int retryIndex = 0; retryIndex < 3; retryIndex++)
                        {
                            try
                            {
                                content = _remoteFileFetcherGateway.GetFileContent(imageUrl).Result.Content;
                                break;
                            }
                            catch
                            {
                                Task.Delay(200).Wait();
                            }
                        }
                        if (content.Length == 0)
                        {
                            _imagesRepository.DeleteImageByUrl(imageUrl).Wait();
                            return;
                        }
                        StoreImage(md5, content, imageUrl).Wait();
                    }
                    catch (Exception ex)
                    {
                        _logger.LogWarning(ex, "There was a problem with the following image url: " + imageUrl + " ");
                    }
                });
            }
        }