Beispiel #1
0
        public override GetFilesResult GetFiles(List <ReplaceInfo> replaceList)
        {
            GetFilesResult result      = new GetFilesResult();
            bool           seenSpoiler = false;

            foreach (HtmlTagRange postTagRange in Parser.FindStartTags("div").Where(t => HtmlParser.ClassAttributeValueHas(t, "post"))
                     .Select(t => Parser.CreateTagRange(t)).Where(r => r != null))
            {
                HtmlTagRange fileTextDivTagRange = Parser.CreateTagRange(Parser.FindStartTags(postTagRange, "div")
                                                                         .Where(t => HtmlParser.ClassAttributeValueHas(t, "fileText")).FirstOrDefault());
                if (fileTextDivTagRange == null)
                {
                    continue;
                }

                HtmlTagRange fileThumbLinkTagRange = Parser.CreateTagRange(Parser.FindStartTags(postTagRange, "a")
                                                                           .Where(t => HtmlParser.ClassAttributeValueHas(t, "fileThumb")).FirstOrDefault());
                if (fileThumbLinkTagRange == null)
                {
                    continue;
                }

                HtmlTag fileTextLinkStartTag = Parser.FindStartTag(fileTextDivTagRange, "a");
                if (fileTextLinkStartTag == null)
                {
                    continue;
                }

                HtmlTag fileThumbImageTag = Parser.FindStartTag(fileThumbLinkTagRange, "img");
                if (fileThumbImageTag == null)
                {
                    continue;
                }

                string imageUrl = fileTextLinkStartTag.GetAttributeValue("href");
                if (imageUrl == null)
                {
                    continue;
                }

                string thumbUrl = fileThumbImageTag.GetAttributeValue("src");
                if (thumbUrl == null)
                {
                    continue;
                }

                bool isSpoiler = HtmlParser.ClassAttributeValueHas(fileThumbLinkTagRange.StartTag, "imgspoiler");

                string originalFileName;
                if (isSpoiler)
                {
                    originalFileName = fileTextDivTagRange.StartTag.GetAttributeValue("title");
                }
                else
                {
                    // If the filename is shortened, the original filename is in the title attribute
                    originalFileName = fileTextLinkStartTag.GetAttributeValue("title");
                    // Otherwise, the link's innerHTML contains the original filename
                    if (originalFileName == null)
                    {
                        HtmlTagRange fileTextLinkTagRange = Parser.CreateTagRange(fileTextLinkStartTag);
                        if (fileTextLinkTagRange == null)
                        {
                            continue;
                        }
                        originalFileName = Parser.GetInnerHtml(fileTextLinkTagRange);
                    }
                }
                if (originalFileName == null)
                {
                    continue;
                }

                string imageMD5 = fileThumbImageTag.GetAttributeValue("data-md5");
                if (imageMD5 == null)
                {
                    continue;
                }

                ImageInfo image = new ImageInfo {
                    Url     = General.GetAbsoluteUrl(Uri, HttpUtility.HtmlDecode(imageUrl)),
                    Referer = Url,
                    UnsanitizedOriginalFileName = HttpUtility.HtmlDecode(originalFileName),
                    HashType = HashType.MD5,
                    Hash     = General.TryBase64Decode(imageMD5)
                };
                if (image.Url.Length == 0 || image.FileName.Length == 0 || image.Hash == null)
                {
                    continue;
                }

                ThumbnailInfo thumb = new ThumbnailInfo {
                    Url     = General.GetAbsoluteUrl(Uri, HttpUtility.HtmlDecode(thumbUrl)),
                    Referer = Url
                };
                if (thumb.Url == null || thumb.FileName.Length == 0)
                {
                    continue;
                }

                if (replaceList != null)
                {
                    HtmlAttribute attribute;

                    attribute = fileTextLinkStartTag.GetAttribute("href");
                    if (attribute != null)
                    {
                        replaceList.Add(
                            new ReplaceInfo {
                            Offset = attribute.Offset,
                            Length = attribute.Length,
                            Type   = ReplaceType.ImageLinkHref,
                            Tag    = image.FileName
                        });
                    }

                    attribute = fileThumbLinkTagRange.StartTag.GetAttribute("href");
                    if (attribute != null)
                    {
                        replaceList.Add(
                            new ReplaceInfo {
                            Offset = attribute.Offset,
                            Length = attribute.Length,
                            Type   = ReplaceType.ImageLinkHref,
                            Tag    = image.FileName
                        });
                    }

                    attribute = fileThumbImageTag.GetAttribute("src");
                    if (attribute != null)
                    {
                        replaceList.Add(
                            new ReplaceInfo {
                            Offset = attribute.Offset,
                            Length = attribute.Length,
                            Type   = ReplaceType.ImageSrc,
                            Tag    = thumb.FileName
                        });
                    }
                }

                result.Images.Add(image);

                if (!isSpoiler || !seenSpoiler)
                {
                    result.Thumbnails.Add(thumb);
                    if (isSpoiler)
                    {
                        seenSpoiler = true;
                    }
                }
            }

            return(result);
        }
Beispiel #2
0
        private void Check()
        {
            try {
                lock (_settingsSync) {
                    _nextCheckWorkItem = null;
                    _checkFinishedEvent.Reset();
                    _isWaiting = false;

                    if (!_hasInitialized)
                    {
                        _pageList = new List <PageInfo> {
                            new PageInfo(PageUrl)
                        };
                        _imageDiskFileNames = new HashSet <string>(StringComparer.OrdinalIgnoreCase);
                        _completedImages    = new Dictionary <string, DownloadInfo>(StringComparer.OrdinalIgnoreCase);
                        _completedThumbs    = new Dictionary <string, DownloadInfo>(StringComparer.OrdinalIgnoreCase);

                        CreateDirectory(_threadDownloadDirectory);

                        _hasInitialized = true;
                    }
                }

                string threadDir = ThreadDownloadDirectory;
                string imageDir  = ImageDownloadDirectory;
                string thumbDir  = ThumbnailDownloadDirectory;

                Queue <ImageInfo>     pendingImages = new Queue <ImageInfo>();
                Queue <ThumbnailInfo> pendingThumbs = new Queue <ThumbnailInfo>();

                if (imageDir != _previousImageDir)
                {
                    // If the image directory changed, recalculate the maximum filename length. This
                    // affects the generated filenames, so rescan the files as well.
                    _imageFileNameLengthLimit = null;
                    _previousImageDir         = imageDir;
                    _imageDiskFileNames.Clear();
                    _completedImages.Clear();
                    _completedThumbs.Clear();
                }

                foreach (PageInfo pageInfo in _pageList)
                {
                    // Reset the fresh flag on all of the pages before downloading starts so that
                    // they're valid even if stopping before all the pages have been downloaded
                    pageInfo.IsFresh = false;
                }

                int  pageIndex      = 0;
                bool anyPageSkipped = false;
                OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Page, 0, _pageList.Count));
                while (pageIndex < _pageList.Count && !IsStopping)
                {
                    string     saveFileName = PageBaseFileName + (pageIndex == 0 ? "" : $"_{pageIndex + 1}") + ".html";
                    HtmlParser pageParser   = null;

                    PageInfo pageInfo = _pageList[pageIndex];
                    pageInfo.Path = Path.Combine(threadDir, saveFileName);

                    ManualResetEvent downloadEndEvent = new ManualResetEvent(false);
                    void DownloadEndCallback(DownloadResult result, string content, DateTime?lastModifiedTime, Encoding encoding)
                    {
                        if (result == DownloadResult.Completed)
                        {
                            pageInfo.IsFresh     = true;
                            pageParser           = new HtmlParser(content);
                            pageInfo.CacheTime   = lastModifiedTime;
                            pageInfo.Encoding    = encoding;
                            pageInfo.ReplaceList = Settings.SaveThumbnails ? new List <ReplaceInfo>() : null;
                        }
                        downloadEndEvent.Set();
                    }
                    DownloadPageAsync(pageInfo.Path, pageInfo.Url, PageAuth, _anyPendingRetries ? null : pageInfo.CacheTime, DownloadEndCallback);
                    downloadEndEvent.WaitOne();
                    downloadEndEvent.Close();

                    if (pageParser == null)
                    {
                        anyPageSkipped = true;
                    }
                    else
                    {
                        SiteHelper.SetParameters(this, pageParser);

                        GetFilesResult getFilesResult = SiteHelper.GetFiles(pageInfo.ReplaceList);

                        if (_completedImages.Count == 0)
                        {
                            if (getFilesResult.Images.Count != 0)
                            {
                                CreateDirectory(imageDir);
                                _imageFileNameLengthLimit ??= GetFileNameLengthLimit(imageDir);
                            }
                            if (getFilesResult.Thumbnails.Count != 0)
                            {
                                CreateDirectory(thumbDir);
                            }
                            foreach (ImageInfo image in getFilesResult.Images)
                            {
                                string fileName = GetUniqueFileName(image.GetEffectiveFileName(UseOriginalFileNames, _imageFileNameLengthLimit.Value), _imageDiskFileNames, true);
                                string path     = Path.Combine(imageDir, fileName);
                                if (File.Exists(path))
                                {
                                    _imageDiskFileNames.Add(fileName);
                                    _completedImages[image.FileName] = new DownloadInfo {
                                        FileName = fileName,
                                        Skipped  = false
                                    };
                                }
                            }
                            foreach (ThumbnailInfo thumb in getFilesResult.Thumbnails)
                            {
                                string path = Path.Combine(thumbDir, thumb.FileName);
                                if (File.Exists(path))
                                {
                                    _completedThumbs[thumb.FileName] = new DownloadInfo {
                                        FileName = thumb.FileName,
                                        Skipped  = false
                                    };
                                }
                            }
                        }
                        HashSet <string> pendingImageFileNames = new HashSet <string>(_completedImages.Comparer);
                        foreach (ImageInfo image in getFilesResult.Images)
                        {
                            if (_completedImages.ContainsKey(image.FileName))
                            {
                                continue;
                            }
                            if (!pendingImageFileNames.Add(image.FileName))
                            {
                                continue;
                            }
                            pendingImages.Enqueue(image);
                        }
                        HashSet <string> pendingThumbFileNames = new HashSet <string>(_completedThumbs.Comparer);
                        foreach (ThumbnailInfo thumb in getFilesResult.Thumbnails)
                        {
                            if (_completedThumbs.ContainsKey(thumb.FileName))
                            {
                                continue;
                            }
                            if (!pendingThumbFileNames.Add(thumb.FileName))
                            {
                                continue;
                            }
                            pendingThumbs.Enqueue(thumb);
                        }

                        string nextPageUrl = SiteHelper.GetNextPageUrl();
                        if (!String.IsNullOrEmpty(nextPageUrl))
                        {
                            PageInfo nextPageInfo = new PageInfo(nextPageUrl);
                            if (pageIndex == _pageList.Count - 1)
                            {
                                _pageList.Add(nextPageInfo);
                            }
                            else if (_pageList[pageIndex + 1].Url != nextPageUrl)
                            {
                                _pageList[pageIndex + 1] = nextPageInfo;
                            }
                        }
                        else if (pageIndex < _pageList.Count - 1)
                        {
                            _pageList.RemoveRange(pageIndex + 1, _pageList.Count - (pageIndex + 1));
                        }
                    }

                    pageIndex++;
                    OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Page, pageIndex, _pageList.Count));
                }
                if (!anyPageSkipped)
                {
                    _anyPendingRetries = false;
                }

                MillisecondsUntilNextCheck = CheckIntervalSeconds * 1000;

                if (pendingImages.Count != 0 && !IsStopping)
                {
                    LastImageOn = DateTime.UtcNow;
                    OnFoundNewImage(EventArgs.Empty);

                    List <ManualResetEvent> downloadEndEvents = new List <ManualResetEvent>();
                    int completedImageCount = 0;
                    foreach (KeyValuePair <string, DownloadInfo> item in _completedImages)
                    {
                        if (!item.Value.Skipped)
                        {
                            completedImageCount++;
                        }
                    }
                    int totalImageCount = completedImageCount + pendingImages.Count;
                    OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Image, completedImageCount, totalImageCount));
                    while (pendingImages.Count != 0 && !IsStopping)
                    {
                        ImageInfo image    = pendingImages.Dequeue();
                        string    fileName = GetUniqueFileName(image.GetEffectiveFileName(UseOriginalFileNames, _imageFileNameLengthLimit.Value), _imageDiskFileNames);
                        string    path     = Path.Combine(imageDir, fileName);

                        HashType         hashType         = Settings.VerifyImageHashes ? image.HashType : HashType.None;
                        ManualResetEvent downloadEndEvent = new ManualResetEvent(false);
                        void DownloadEndCallback(DownloadResult result)
                        {
                            if (result == DownloadResult.Completed || result == DownloadResult.Skipped)
                            {
                                lock (_completedImages) {
                                    _completedImages[image.FileName] = new DownloadInfo {
                                        FileName = fileName,
                                        Skipped  = (result == DownloadResult.Skipped)
                                    };
                                    if (result == DownloadResult.Completed)
                                    {
                                        completedImageCount++;
                                    }
                                    else if (result == DownloadResult.Skipped)
                                    {
                                        totalImageCount--;
                                    }
                                    OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Image, completedImageCount, totalImageCount));
                                }
                            }
                            if (result == DownloadResult.Skipped || result == DownloadResult.RetryLater)
                            {
                                lock (_imageDiskFileNames) {
                                    _imageDiskFileNames.Remove(fileName);
                                    if (result == DownloadResult.RetryLater)
                                    {
                                        _anyPendingRetries = true;
                                    }
                                }
                            }
                            downloadEndEvent.Set();
                        }
                        downloadEndEvents.Add(downloadEndEvent);
                        DownloadFileAsync(path, image.Url, ImageAuth, image.Referer, hashType, image.Hash, DownloadEndCallback);
                    }
                    foreach (ManualResetEvent downloadEndEvent in downloadEndEvents)
                    {
                        downloadEndEvent.WaitOne();
                        downloadEndEvent.Close();
                    }
                }

                if (Settings.SaveThumbnails)
                {
                    if (pendingThumbs.Count != 0 && !IsStopping)
                    {
                        List <ManualResetEvent> downloadEndEvents = new List <ManualResetEvent>();
                        int completedThumbCount = 0;
                        foreach (KeyValuePair <string, DownloadInfo> item in _completedThumbs)
                        {
                            if (!item.Value.Skipped)
                            {
                                completedThumbCount++;
                            }
                        }
                        int totalThumbCount = completedThumbCount + pendingThumbs.Count;
                        OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Thumbnail, completedThumbCount, totalThumbCount));
                        while (pendingThumbs.Count != 0 && !IsStopping)
                        {
                            ThumbnailInfo thumb = pendingThumbs.Dequeue();
                            string        path  = Path.Combine(thumbDir, thumb.FileName);

                            ManualResetEvent downloadEndEvent = new ManualResetEvent(false);
                            void DownloadEndCallback(DownloadResult result)
                            {
                                if (result == DownloadResult.Completed || result == DownloadResult.Skipped)
                                {
                                    lock (_completedThumbs) {
                                        _completedThumbs[thumb.FileName] = new DownloadInfo {
                                            FileName = thumb.FileName,
                                            Skipped  = (result == DownloadResult.Skipped)
                                        };
                                        if (result == DownloadResult.Completed)
                                        {
                                            completedThumbCount++;
                                        }
                                        else if (result == DownloadResult.Skipped)
                                        {
                                            totalThumbCount--;
                                        }
                                        OnDownloadStatus(new DownloadStatusEventArgs(DownloadType.Thumbnail, completedThumbCount, totalThumbCount));
                                    }
                                }
                                downloadEndEvent.Set();
                            }
                            downloadEndEvents.Add(downloadEndEvent);
                            DownloadFileAsync(path, thumb.Url, PageAuth, thumb.Referer, HashType.None, null, DownloadEndCallback);
                        }
                        foreach (ManualResetEvent downloadEndEvent in downloadEndEvents)
                        {
                            downloadEndEvent.WaitOne();
                            downloadEndEvent.Close();
                        }
                    }

                    if (!IsStopping || StopReason != StopReason.IOError)
                    {
                        foreach (PageInfo pageInfo in _pageList)
                        {
                            if (!pageInfo.IsFresh)
                            {
                                continue;
                            }
                            HtmlParser htmlParser = new HtmlParser(File.ReadAllText(pageInfo.Path, pageInfo.Encoding));
                            for (int i = 0; i < pageInfo.ReplaceList.Count; i++)
                            {
                                ReplaceInfo  replace      = pageInfo.ReplaceList[i];
                                DownloadInfo downloadInfo = null;
                                string GetRelativeDownloadPath(string fileDownloadDir) =>
                                General.GetRelativeFilePath(Path.Combine(fileDownloadDir, downloadInfo.FileName), threadDir).Replace(Path.DirectorySeparatorChar, '/');

                                if (replace.Type == ReplaceType.ImageLinkHref && _completedImages.TryGetValue(replace.Tag, out downloadInfo))
                                {
                                    replace.Value = "href=\"" + HttpUtility.HtmlAttributeEncode(GetRelativeDownloadPath(imageDir)) + "\"";
                                }
                                if (replace.Type == ReplaceType.ImageSrc && _completedThumbs.TryGetValue(replace.Tag, out downloadInfo))
                                {
                                    replace.Value = "src=\"" + HttpUtility.HtmlAttributeEncode(GetRelativeDownloadPath(thumbDir)) + "\"";
                                }
                            }
                            General.AddOtherReplaces(htmlParser, pageInfo.Url, pageInfo.ReplaceList);
                            using (StreamWriter sw = new StreamWriter(pageInfo.Path, false, pageInfo.Encoding)) {
                                General.WriteReplacedString(htmlParser.PreprocessedHtml, pageInfo.ReplaceList, sw);
                            }
                            if (htmlParser.FindEndTag("html") != null && File.Exists(pageInfo.Path + ".bak"))
                            {
                                try { File.Delete(pageInfo.Path + ".bak"); }
                                catch { }
                            }
                        }
                    }
                }

                if (OneTimeDownload)
                {
                    Stop(StopReason.DownloadComplete);
                }
            }
            catch {
                Stop(StopReason.Other);
            }

            if (IsThreadDownloadDirectoryPendingRename)
            {
                TryRenameThreadDownloadDirectory(true);
            }
            lock (_settingsSync) {
                _checkFinishedEvent.Set();
                if (!IsStopping)
                {
                    _nextCheckWorkItem = _workScheduler.AddItem(NextCheckTicks, Check, PageHost);
                    _isWaiting         = MillisecondsUntilNextCheck > 0;
                }
            }
            if (IsStopping)
            {
                OnStopStatus(EventArgs.Empty);
            }
            else if (IsWaiting)
            {
                OnWaitStatus(EventArgs.Empty);
            }
        }