private Tuple <int> AggregeteResults(Tuple <string, int, int> tuple) { string tmpFile = tuple.Item1; int partIngestCount = tuple.Item2; int index = tuple.Item3; lock (_lockTaskWriter) { File.AppendAllText(workingFile, File.ReadAllText(tmpFile)); File.Delete(tmpFile); //CosmosStream.UploadStream(serverFile, workingFile); ingestedCount += partIngestCount; ImageLogger.LogMessage(this.log, EventType.Information, "[ImageProcess] - Image Process in progress [{0}/{1}]", ingestedCount, listTask.Count); if (DateTime.Now - this.lastTime > this.reportInterval) { this.WriteLog( EventType.Information, string.Format( "[ImageProcess]: In Progress ... [{0}/{1}]", ingestedCount, listTask.Count)); lastTime = DateTime.Now; } } return(new Tuple <int>(index)); }
public void Start(int threadCount) { this.processFailureBuffer = this.failureBuffer; ingestedCount = 0; this.lastTime = DateTime.Now; ImageProcessHelper.SplitIntoBuckets(listTask, ingestBurstCount) .AsParallel() .WithDegreeOfParallelism(threadCount) .Select(DownloadImage) .Select(ScaleImage) .Select(IngestImage) .Select(AggregeteResults) .ToList(); if (!Result) { ImageLogger.LogMessage(this.log, EventType.Warning, "Image process failure buffer exhausted!"); } else { ImageLogger.LogMessage(this.log, EventType.Information, "[ImageProcess] - Image Process Done [{0}/{1}]", ingestedCount, listTask.Count); } }
public static bool ProcessImage(string imageUrlListFilePath, string imageDownloadFolder, string imageScaleFolder, string imageUrlMappingFile, string serverFile, TraceLog log, Action <EventType, string> WriteLog) { HashSet <string> folderFiles = new HashSet <string>(); if (File.Exists(imageUrlMappingFile)) { folderFiles = new HashSet <string>(File.ReadLines(imageUrlMappingFile).Select(l => GetHashFileName(l))); } // read Image URl list file to get process task ImageWorkingFile iwf = new ImageWorkingFile(imageUrlListFilePath, log); List <Image> listAll = iwf.Read(); ImageLogger.LogMessage(log, EventType.Information, "[ImageProcess] - Total {0} images in list.", listAll.Count); // assign local path name to each myImage List <Image> listNew = AssignLocalPathName(listAll, imageDownloadFolder, imageScaleFolder, folderFiles); ImageLogger.LogMessage(log, EventType.Information, "[ImageProcess] - Total {0} new images need to process.", listNew.Count); WriteLog( EventType.Information, string.Format("[ImageProcess] - Total {0} new images need to process.", listNew.Count)); int failureBuffer = Math.Max(listAll.Count * 3 / 100, 50); // 3% or 50 failure buffer int threadCount = 10; // ConfigStore.Instance.OsClientConfig.IngestThreadCount; int ingestBurstCount = 10; // ConfigStore.Instance.OsClientConfig.IngestBurstCount; string proxy = "bjsproxy"; if (listNew.Count >= 1000000) { ImageLogger.LogMessage(log, EventType.Error, "[ImageProcess] - Too many images (1+ million) need to process, will casue image table full."); return(false); } else if (listNew.Count > 0) { System.Net.ServicePointManager.DefaultConnectionLimit = 5120; ImageProcessThread thread = new ImageProcessThread(log, WriteLog); thread.AssignTaskList(listNew, imageUrlMappingFile, serverFile, proxy, failureBuffer, ingestBurstCount); thread.Start(threadCount); if (!thread.Result) { return(false); } } else { ImageLogger.LogMessage(log, EventType.Information, "[ImageProcess] - No new images to process."); } return(true); }
private Tuple <List <Image>, int> ScaleImage(Tuple <List <Image>, int> tuple) { List <Image> list = tuple.Item1; int index = tuple.Item2; List <Image> scaledList = new List <Image>(); foreach (var image in list) { try { lock (_failBufferLock) { if (!Result) { break; } } bool bSuccess = image.ScaleDefault(); if (!bSuccess) { continue; } //image.ExtractAccentColor(log); scaledList.Add(image); lock (_failBufferLock) { // process succeed, reset failure buffer this.processFailureBuffer = this.failureBuffer; } } catch (Exception ex) { ImageLogger.LogMessage(this.log, EventType.Error, ex.Message); ImageLogger.LogMessage(this.log, EventType.Warning, "Exception when scale image {0}", image.SourceUrl); lock (_failBufferLock) { this.processFailureBuffer -= 1; if (this.processFailureBuffer < 0) { Result = false; } } continue; } } return(new Tuple <List <Image>, int>(scaledList, index)); }
public List <Image> Read() { List <Image> list = new List <Image>(); if (!File.Exists(this.FileName)) { return(list); } HashSet <string> unique = new HashSet <string>(); using (StreamReader sr = new StreamReader(this.FileName, true)) { while (!sr.EndOfStream) { string line = sr.ReadLine(); Image image = null; try { image = Image.FromString(line.Trim()); } catch (Exception ex) { ImageLogger.LogMessage(this.log, EventType.Error, ex.Message); ImageLogger.LogMessage(this.log, EventType.Warning, "Invalid image line: {0}", line.Trim()); continue; } if (string.IsNullOrEmpty(image.SourceUrl)) { ImageLogger.LogMessage(this.log, EventType.Warning, "Invalid image line: {0}", line.Trim()); continue; } if (!unique.Contains(image.SourceUrl)) // dedup { list.Add(image); unique.Add(image.SourceUrl); } } } return(list); }
private Tuple <string, int, int> IngestImage(Tuple <List <Image>, int> tuple) { List <Image> list = tuple.Item1; int index = tuple.Item2; int partIngestedCount = 0; string tmpFile = string.Format("{0}.part{1}", workingFile, index); using (StreamWriter sw = new StreamWriter(tmpFile, true)) { string feed = string.Empty; List <IDataLoadResult> results = null; try { using (DataLoader loader = new DataLoader(OSContext.Instance.Configuration)) { for (int i = 0; i < list.Count; ++i) { Image myImage = list[i]; byte[] data = ScaledImageToBinary(myImage); string hashValue = HttpUrlHash.GetHashValueString(myImage.SourceUrl); results = ImageIngester.Ingest(hashValue, data, myImage, loader); partIngestedCount += LogIngestResults(results, sw); } loader.Flush(); results = loader.Receive(true); partIngestedCount += LogIngestResults(results, sw); } } catch (Exception ex) { ImageLogger.LogMessage(this.log, EventType.Error, "[ImageIngest][{0}][T{1}]: {2}", feed, index, ex.Message); ImageLogger.LogMessage(this.log, EventType.Error, ex.StackTrace); } } return(new Tuple <string, int, int>(tmpFile, partIngestedCount, index)); }
private int LogIngestResults(List <IDataLoadResult> results, StreamWriter sw) { int succeedCount = 0; foreach (IDataLoadResult result in results) { Image myImage = result.Context as Image; string key, feed, scenario; scenario = ExtractImageContext(myImage, out feed, out key); if (result.IsSuccessful) { sw.WriteLine(myImage.ToString()); ++succeedCount; } else { ImageLogger.LogMessage(this.log, EventType.Warning, "[ImageIngest][{0}]: Ingest failed to {1} locations: {2}", scenario, result.FailedLocations.Count, String.Join(", ", result.FailedLocations)); } } return(succeedCount); }
private bool DownloadImage(Image myImage) { BaseCrawler crawler = null; string sourceUrl = myImage.SourceUrl.Trim(); if (sourceUrl.StartsWith(@"https://cosmos", StringComparison.InvariantCultureIgnoreCase)) // cosmos path { crawler = new CosmosCrawler(FeedUriTypes.ShareFolder, sourceUrl, myImage.DownloadImageName, DateTime.MinValue); } else if (sourceUrl.StartsWith("http://") || sourceUrl.StartsWith("https://")) { ProxyType proxyType; if (!Enum.TryParse <ProxyType>(proxy, true, out proxyType)) { proxyType = ProxyType.NULL; } crawler = new HttpCrawler(FeedUriTypes.Http, sourceUrl, myImage.DownloadImageName, DateTime.MinValue, null, proxyType); } else // wrong image URI { ImageLogger.LogMessage(this.log, EventType.Warning, "Cannot identify this image's URI: {0}", sourceUrl); return(false); } if (crawler != null) { if (crawler.Crawl() != BuildResults.Crawler_Succeed) { ImageLogger.LogMessage(this.log, EventType.Warning, "Exception when download image {0}", sourceUrl); return(false); } } return(true); }