public static async void SetUp(ScraperOptions scraperOptions) { var optionsChrome = new ChromeOptions(); optionsChrome.AddUserProfilePreference("profile.default_content_setting_values.images", 2); optionsChrome.AddArguments("--disable-popup-blocking", "--window-size=1920,1080", "--mute-audio"); if (scraperOptions.Headless) { optionsChrome.AddArgument("headless"); } _driver = new ChromeDriver("./bin/Debug/netcoreapp2.2", optionsChrome); string savePath; var homePath = Environment.OSVersion.Platform == PlatformID.Unix || Environment.OSVersion.Platform == PlatformID.MacOSX ? Environment.GetEnvironmentVariable("HOME") : Environment.ExpandEnvironmentVariables("%HOMEDRIVE%%HOMEPATH%"); if (scraperOptions.FolderSavePath.Equals(string.Empty)) { savePath = homePath + "/Pictures/" + scraperOptions.TargetAccount + "/"; } else { var folderSavePathSections = scraperOptions.FolderSavePath.Split("/"); var maxIndex = folderSavePathSections.Length - 1; if (folderSavePathSections[maxIndex].IndexOf(scraperOptions.TargetAccount, StringComparison.OrdinalIgnoreCase) >= 0) { savePath = scraperOptions.FolderSavePath + "/"; } else { savePath = scraperOptions.FolderSavePath + "/" + scraperOptions.TargetAccount + "/"; } } if (!scraperOptions.OnlyScrapeStory) { var bufferMedia = WebDriverExtensions.StartMediaService(savePath); var bufferStory = scraperOptions.ScrapeStory ? WebDriverExtensions.StartStoryService(savePath) : null; var bufferText = scraperOptions.ScrapeComments ? WebDriverExtensions.StartTextService(savePath) : null; new ScraperController(_driver, scraperOptions, bufferMedia, bufferText, bufferStory).ExecuteScraper(); await bufferMedia.Completion; if (bufferText != null) { await bufferText.Completion; } if (bufferStory != null) { await bufferStory.Completion; } } else { var bufferStory = WebDriverExtensions.StartStoryService(savePath); new ScraperController(_driver, scraperOptions, null, null, bufferStory).OnlyScrapeStory(); await bufferStory.Completion; } _driver.Quit(); }
public static async void ConsumeStoryAsync(string path, ISourceBlock <KeyValuePair <string, string> > source) { var dirPath = path + "Stories/"; var dirPathTemp = dirPath + "Temp/"; if (!File.Exists(dirPath)) { Directory.CreateDirectory(dirPath); } Directory.CreateDirectory(dirPathTemp); var storyItemsProcessed = 0; var storyItemsDownloaded = 0; while (await source.OutputAvailableAsync()) { var client = new WebClient(); var(storyName, storyUri) = source.Receive(); storyItemsProcessed++; Logger.Info("ConsumeStoryAsync|" + storyItemsProcessed + " Processing: " + storyName); if (storyUri.Contains(".mp4")) { client.DownloadFileAsync(new Uri(storyUri), dirPathTemp + storyName + ".mp4"); } else { client.DownloadFileAsync(new Uri(storyUri), dirPathTemp + storyName + ".jpg"); } } System.Threading.Thread.Sleep(2000); var existingStoryList = WebDriverExtensions.GetFilesFromDirectory(dirPath); var newStoryList = WebDriverExtensions.GetFilesFromDirectory(dirPathTemp); if (existingStoryList.Count == 0) { var fileInfo = new DirectoryInfo(dirPathTemp).GetFiles("*"); foreach (var file in fileInfo) { File.Move(dirPathTemp + file.Name, dirPath + file.Name); } storyItemsDownloaded += fileInfo.Length; } else { //Difference new -> existing, and is used as newList's index - offset, but in the existing var currentOffset = 0; void Test() { for (var i = 0; i < newStoryList.Count; i++) { if (i + currentOffset < existingStoryList.Count) { // Logger.Debug("New " + newStoryList[i].Key + " equals Existing " + existingStoryList[i + currentOffset].Key + ": {0}", // newStoryList[i].Value.SequenceEqual(existingStoryList[i + currentOffset].Value)); if (!newStoryList[i].Value.SequenceEqual(existingStoryList[i + currentOffset].Value) && i + 1 + currentOffset < existingStoryList.Count) { for (var j = i + 1; j < existingStoryList.Count; j++) { // Logger.Debug("New " + newStoryList[i].Key + " equals Existing " + existingStoryList[j + currentOffset].Key + ": {0}", // newStoryList[i].Value.SequenceEqual(existingStoryList[j + currentOffset].Value)); if (newStoryList[i].Value.SequenceEqual(existingStoryList[j + currentOffset].Value)) { currentOffset += 1; break; } if (j + 1 < existingStoryList.Count) { continue; } // Logger.Debug("Maxed out inner comparisons"); MoveExistingAndRemainingNewFiles(dirPathTemp, dirPath, existingStoryList, newStoryList, i); return; } } else if (!newStoryList[i].Value.SequenceEqual(existingStoryList[i + currentOffset].Value)) { // Logger.Debug("Hit second else branch"); MoveExistingAndRemainingNewFiles(dirPathTemp, dirPath, existingStoryList, newStoryList, i); return; } } else { // Logger.Debug("Hit first else branch"); MoveExistingAndRemainingNewFiles(dirPathTemp, dirPath, existingStoryList, newStoryList, i); return; } } } Test(); } //Cleans up temp folder Directory.Delete(dirPathTemp, true); Logger.Info("ConsumeStoryAsync|" + "Processed {0} story items.", storyItemsProcessed); Logger.Info("ConsumeStoryAsync|" + "Downloaded {0} story items.", storyItemsDownloaded); }