public override GetFilesResult GetFiles(List <ReplaceInfo> replaceList) { GetFilesResult result = new GetFilesResult(); bool seenSpoiler = false; foreach (HtmlTagRange postTagRange in Parser.FindStartTags("div").Where(t => HtmlParser.ClassAttributeValueHas(t, "post")) .Select(t => Parser.CreateTagRange(t)).Where(r => r != null)) { HtmlTagRange fileTextDivTagRange = Parser.CreateTagRange(Parser.FindStartTags(postTagRange, "div") .Where(t => HtmlParser.ClassAttributeValueHas(t, "fileText")).FirstOrDefault()); if (fileTextDivTagRange == null) { continue; } HtmlTagRange fileThumbLinkTagRange = Parser.CreateTagRange(Parser.FindStartTags(postTagRange, "a") .Where(t => HtmlParser.ClassAttributeValueHas(t, "fileThumb")).FirstOrDefault()); if (fileThumbLinkTagRange == null) { continue; } HtmlTag fileTextLinkStartTag = Parser.FindStartTag(fileTextDivTagRange, "a"); if (fileTextLinkStartTag == null) { continue; } HtmlTag fileThumbImageTag = Parser.FindStartTag(fileThumbLinkTagRange, "img"); if (fileThumbImageTag == null) { continue; } string imageUrl = fileTextLinkStartTag.GetAttributeValue("href"); if (imageUrl == null) { continue; } string thumbUrl = fileThumbImageTag.GetAttributeValue("src"); if (thumbUrl == null) { continue; } bool isSpoiler = HtmlParser.ClassAttributeValueHas(fileThumbLinkTagRange.StartTag, "imgspoiler"); string originalFileName; if (isSpoiler) { originalFileName = fileTextDivTagRange.StartTag.GetAttributeValue("title"); } else { // If the filename is shortened, the original filename is in the title attribute originalFileName = fileTextLinkStartTag.GetAttributeValue("title"); // Otherwise, the link's innerHTML contains the original filename if (originalFileName == null) { HtmlTagRange fileTextLinkTagRange = Parser.CreateTagRange(fileTextLinkStartTag); if (fileTextLinkTagRange == null) { continue; } originalFileName = Parser.GetInnerHtml(fileTextLinkTagRange); } } if (originalFileName == null) { continue; } string imageMD5 = fileThumbImageTag.GetAttributeValue("data-md5"); if (imageMD5 == null) { continue; } ImageInfo image = new ImageInfo { Url = General.GetAbsoluteUrl(Uri, HttpUtility.HtmlDecode(imageUrl)), Referer = Url, UnsanitizedOriginalFileName = HttpUtility.HtmlDecode(originalFileName), HashType = HashType.MD5, Hash = General.TryBase64Decode(imageMD5) }; if (image.Url.Length == 0 || image.FileName.Length == 0 || image.Hash == null) { continue; } ThumbnailInfo thumb = new ThumbnailInfo { Url = General.GetAbsoluteUrl(Uri, HttpUtility.HtmlDecode(thumbUrl)), Referer = Url }; if (thumb.Url == null || thumb.FileName.Length == 0) { continue; } if (replaceList != null) { HtmlAttribute attribute; attribute = fileTextLinkStartTag.GetAttribute("href"); if (attribute != null) { replaceList.Add( new ReplaceInfo { Offset = attribute.Offset, Length = attribute.Length, Type = ReplaceType.ImageLinkHref, Tag = image.FileName }); } attribute = fileThumbLinkTagRange.StartTag.GetAttribute("href"); if (attribute != null) { replaceList.Add( new ReplaceInfo { Offset = attribute.Offset, Length = attribute.Length, Type = ReplaceType.ImageLinkHref, Tag = image.FileName }); } attribute = fileThumbImageTag.GetAttribute("src"); if (attribute != null) { replaceList.Add( new ReplaceInfo { Offset = attribute.Offset, Length = attribute.Length, Type = ReplaceType.ImageSrc, Tag = thumb.FileName }); } } result.Images.Add(image); if (!isSpoiler || !seenSpoiler) { result.Thumbnails.Add(thumb); if (isSpoiler) { seenSpoiler = true; } } } return(result); }
private void Check() { try { lock (_settingsSync) { _nextCheckWorkItem = null; _checkFinishedEvent.Reset(); _isWaiting = false; if (!_hasInitialized) { _pageList = new List <PageInfo> { new PageInfo(PageUrl) }; _imageDiskFileNames = new HashSet <string>(StringComparer.OrdinalIgnoreCase); _completedImages = new Dictionary <string, DownloadInfo>(StringComparer.OrdinalIgnoreCase); _completedThumbs = new Dictionary <string, DownloadInfo>(StringComparer.OrdinalIgnoreCase); CreateDirectory(_threadDownloadDirectory); _hasInitialized = true; } } string threadDir = ThreadDownloadDirectory; string imageDir = ImageDownloadDirectory; string thumbDir = ThumbnailDownloadDirectory; Queue <ImageInfo> pendingImages = new Queue <ImageInfo>(); Queue <ThumbnailInfo> pendingThumbs = new Queue <ThumbnailInfo>(); if (imageDir != _previousImageDir) { // If the image directory changed, recalculate the maximum filename length. This // affects the generated filenames, so rescan the files as well. _imageFileNameLengthLimit = null; _previousImageDir = imageDir; _imageDiskFileNames.Clear(); _completedImages.Clear(); _completedThumbs.Clear(); } foreach (PageInfo pageInfo in _pageList) { // Reset the fresh flag on all of the pages before downloading starts so that // they're valid even if stopping before all the pages have been downloaded pageInfo.IsFresh = false; } int pageIndex = 0; bool anyPageSkipped = false; OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Page, 0, _pageList.Count)); while (pageIndex < _pageList.Count && !IsStopping) { string saveFileName = PageBaseFileName + (pageIndex == 0 ? "" : $"_{pageIndex + 1}") + ".html"; HtmlParser pageParser = null; PageInfo pageInfo = _pageList[pageIndex]; pageInfo.Path = Path.Combine(threadDir, saveFileName); ManualResetEvent downloadEndEvent = new ManualResetEvent(false); void DownloadEndCallback(DownloadResult result, string content, DateTime?lastModifiedTime, Encoding encoding) { if (result == DownloadResult.Completed) { pageInfo.IsFresh = true; pageParser = new HtmlParser(content); pageInfo.CacheTime = lastModifiedTime; pageInfo.Encoding = encoding; pageInfo.ReplaceList = Settings.SaveThumbnails ? new List <ReplaceInfo>() : null; } downloadEndEvent.Set(); } DownloadPageAsync(pageInfo.Path, pageInfo.Url, PageAuth, _anyPendingRetries ? null : pageInfo.CacheTime, DownloadEndCallback); downloadEndEvent.WaitOne(); downloadEndEvent.Close(); if (pageParser == null) { anyPageSkipped = true; } else { SiteHelper.SetParameters(this, pageParser); GetFilesResult getFilesResult = SiteHelper.GetFiles(pageInfo.ReplaceList); if (_completedImages.Count == 0) { if (getFilesResult.Images.Count != 0) { CreateDirectory(imageDir); _imageFileNameLengthLimit ??= GetFileNameLengthLimit(imageDir); } if (getFilesResult.Thumbnails.Count != 0) { CreateDirectory(thumbDir); } foreach (ImageInfo image in getFilesResult.Images) { string fileName = GetUniqueFileName(image.GetEffectiveFileName(UseOriginalFileNames, _imageFileNameLengthLimit.Value), _imageDiskFileNames, true); string path = Path.Combine(imageDir, fileName); if (File.Exists(path)) { _imageDiskFileNames.Add(fileName); _completedImages[image.FileName] = new DownloadInfo { FileName = fileName, Skipped = false }; } } foreach (ThumbnailInfo thumb in getFilesResult.Thumbnails) { string path = Path.Combine(thumbDir, thumb.FileName); if (File.Exists(path)) { _completedThumbs[thumb.FileName] = new DownloadInfo { FileName = thumb.FileName, Skipped = false }; } } } HashSet <string> pendingImageFileNames = new HashSet <string>(_completedImages.Comparer); foreach (ImageInfo image in getFilesResult.Images) { if (_completedImages.ContainsKey(image.FileName)) { continue; } if (!pendingImageFileNames.Add(image.FileName)) { continue; } pendingImages.Enqueue(image); } HashSet <string> pendingThumbFileNames = new HashSet <string>(_completedThumbs.Comparer); foreach (ThumbnailInfo thumb in getFilesResult.Thumbnails) { if (_completedThumbs.ContainsKey(thumb.FileName)) { continue; } if (!pendingThumbFileNames.Add(thumb.FileName)) { continue; } pendingThumbs.Enqueue(thumb); } string nextPageUrl = SiteHelper.GetNextPageUrl(); if (!String.IsNullOrEmpty(nextPageUrl)) { PageInfo nextPageInfo = new PageInfo(nextPageUrl); if (pageIndex == _pageList.Count - 1) { _pageList.Add(nextPageInfo); } else if (_pageList[pageIndex + 1].Url != nextPageUrl) { _pageList[pageIndex + 1] = nextPageInfo; } } else if (pageIndex < _pageList.Count - 1) { _pageList.RemoveRange(pageIndex + 1, _pageList.Count - (pageIndex + 1)); } } pageIndex++; OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Page, pageIndex, _pageList.Count)); } if (!anyPageSkipped) { _anyPendingRetries = false; } MillisecondsUntilNextCheck = CheckIntervalSeconds * 1000; if (pendingImages.Count != 0 && !IsStopping) { LastImageOn = DateTime.UtcNow; OnFoundNewImage(EventArgs.Empty); List <ManualResetEvent> downloadEndEvents = new List <ManualResetEvent>(); int completedImageCount = 0; foreach (KeyValuePair <string, DownloadInfo> item in _completedImages) { if (!item.Value.Skipped) { completedImageCount++; } } int totalImageCount = completedImageCount + pendingImages.Count; OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Image, completedImageCount, totalImageCount)); while (pendingImages.Count != 0 && !IsStopping) { ImageInfo image = pendingImages.Dequeue(); string fileName = GetUniqueFileName(image.GetEffectiveFileName(UseOriginalFileNames, _imageFileNameLengthLimit.Value), _imageDiskFileNames); string path = Path.Combine(imageDir, fileName); HashType hashType = Settings.VerifyImageHashes ? image.HashType : HashType.None; ManualResetEvent downloadEndEvent = new ManualResetEvent(false); void DownloadEndCallback(DownloadResult result) { if (result == DownloadResult.Completed || result == DownloadResult.Skipped) { lock (_completedImages) { _completedImages[image.FileName] = new DownloadInfo { FileName = fileName, Skipped = (result == DownloadResult.Skipped) }; if (result == DownloadResult.Completed) { completedImageCount++; } else if (result == DownloadResult.Skipped) { totalImageCount--; } OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Image, completedImageCount, totalImageCount)); } } if (result == DownloadResult.Skipped || result == DownloadResult.RetryLater) { lock (_imageDiskFileNames) { _imageDiskFileNames.Remove(fileName); if (result == DownloadResult.RetryLater) { _anyPendingRetries = true; } } } downloadEndEvent.Set(); } downloadEndEvents.Add(downloadEndEvent); DownloadFileAsync(path, image.Url, ImageAuth, image.Referer, hashType, image.Hash, DownloadEndCallback); } foreach (ManualResetEvent downloadEndEvent in downloadEndEvents) { downloadEndEvent.WaitOne(); downloadEndEvent.Close(); } } if (Settings.SaveThumbnails) { if (pendingThumbs.Count != 0 && !IsStopping) { List <ManualResetEvent> downloadEndEvents = new List <ManualResetEvent>(); int completedThumbCount = 0; foreach (KeyValuePair <string, DownloadInfo> item in _completedThumbs) { if (!item.Value.Skipped) { completedThumbCount++; } } int totalThumbCount = completedThumbCount + pendingThumbs.Count; OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Thumbnail, completedThumbCount, totalThumbCount)); while (pendingThumbs.Count != 0 && !IsStopping) { ThumbnailInfo thumb = pendingThumbs.Dequeue(); string path = Path.Combine(thumbDir, thumb.FileName); ManualResetEvent downloadEndEvent = new ManualResetEvent(false); void DownloadEndCallback(DownloadResult result) { if (result == DownloadResult.Completed || result == DownloadResult.Skipped) { lock (_completedThumbs) { _completedThumbs[thumb.FileName] = new DownloadInfo { FileName = thumb.FileName, Skipped = (result == DownloadResult.Skipped) }; if (result == DownloadResult.Completed) { completedThumbCount++; } else if (result == DownloadResult.Skipped) { totalThumbCount--; } OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Thumbnail, completedThumbCount, totalThumbCount)); } } downloadEndEvent.Set(); } downloadEndEvents.Add(downloadEndEvent); DownloadFileAsync(path, thumb.Url, PageAuth, thumb.Referer, HashType.None, null, DownloadEndCallback); } foreach (ManualResetEvent downloadEndEvent in downloadEndEvents) { downloadEndEvent.WaitOne(); downloadEndEvent.Close(); } } if (!IsStopping || StopReason != StopReason.IOError) { foreach (PageInfo pageInfo in _pageList) { if (!pageInfo.IsFresh) { continue; } HtmlParser htmlParser = new HtmlParser(File.ReadAllText(pageInfo.Path, pageInfo.Encoding)); for (int i = 0; i < pageInfo.ReplaceList.Count; i++) { ReplaceInfo replace = pageInfo.ReplaceList[i]; DownloadInfo downloadInfo = null; string GetRelativeDownloadPath(string fileDownloadDir) => General.GetRelativeFilePath(Path.Combine(fileDownloadDir, downloadInfo.FileName), threadDir).Replace(Path.DirectorySeparatorChar, '/'); if (replace.Type == ReplaceType.ImageLinkHref && _completedImages.TryGetValue(replace.Tag, out downloadInfo)) { replace.Value = "href=\"" + HttpUtility.HtmlAttributeEncode(GetRelativeDownloadPath(imageDir)) + "\""; } if (replace.Type == ReplaceType.ImageSrc && _completedThumbs.TryGetValue(replace.Tag, out downloadInfo)) { replace.Value = "src=\"" + HttpUtility.HtmlAttributeEncode(GetRelativeDownloadPath(thumbDir)) + "\""; } } General.AddOtherReplaces(htmlParser, pageInfo.Url, pageInfo.ReplaceList); using (StreamWriter sw = new StreamWriter(pageInfo.Path, false, pageInfo.Encoding)) { General.WriteReplacedString(htmlParser.PreprocessedHtml, pageInfo.ReplaceList, sw); } if (htmlParser.FindEndTag("html") != null && File.Exists(pageInfo.Path + ".bak")) { try { File.Delete(pageInfo.Path + ".bak"); } catch { } } } } } if (OneTimeDownload) { Stop(StopReason.DownloadComplete); } } catch { Stop(StopReason.Other); } if (IsThreadDownloadDirectoryPendingRename) { TryRenameThreadDownloadDirectory(true); } lock (_settingsSync) { _checkFinishedEvent.Set(); if (!IsStopping) { _nextCheckWorkItem = _workScheduler.AddItem(NextCheckTicks, Check, PageHost); _isWaiting = MillisecondsUntilNextCheck > 0; } } if (IsStopping) { OnStopStatus(EventArgs.Empty); } else if (IsWaiting) { OnWaitStatus(EventArgs.Empty); } }