Beispiel #1
0
 public static void Init()
 {
     if (eventListener == null)
     {
         eventListener = new HttpEventListener();
     }
 }
Beispiel #2
0
        static async Task Main(string[] args)
        {
            var running = true;

            Console.CancelKeyPress += (sender, e) =>
            {
                e.Cancel = true;
                running  = false;
            };
            HttpEventListener.Init();
            Parser.Default.ParseArguments <Options>(args).WithParsed(option =>
            {
                _startPath        = option.StartPagePath;
                _startPathHistory = _startPath + ".history";
                _visitePath       = option.VisitedUrlPath;
                _time             = option.DownloadTime * 1000;
                _startItemPath    = option.StartItemPath;
                _tempPath         = option.TempPath;
                _storePath        = option.VideoStorePath;
                _parallelSize     = option.ParallelSize;
                _blackPath        = option.BlackUrlPath;
                _maxDepth         = (byte)Math.Min(9, option.Depth);
                deep = option.Deep;
            }).WithNotParsed(err =>
            {
                running = false;
            });
            if (!running)
            {
                return;
            }
            var speedCounter = new Speed();
            var visitedUrls  = File.Exists(_visitePath)
                ? File.ReadLines(_visitePath).Where(Utils.IsNotNullOrWhiteSpace).Select(Convert).ToHashSet()
                : new HashSet <uint>();
            var filter = new HashSet <string>();
            var starts = new Queue <string>();
            var pages  = new Stack <string>();

            if (File.Exists(_blackPath))
            {
                foreach (var url in File.ReadLines(_blackPath).Where(Utils.IsNotNullOrWhiteSpace))
                {
                    filter.Add(url);
                }
            }
            if (File.Exists(_startPath))
            {
                var links = new LinkedList <string>();
                var dict  = new Dictionary <string, LinkedListNode <string> >();
                foreach (var url in File.ReadLines(_startPath).Where(Utils.IsNotNullOrWhiteSpace))
                {
                    var tag = url.Substring(url.LastIndexOf('/') + 1);
                    if (filter.Contains(tag) || dict.ContainsKey(url))
                    {
                        continue;
                    }
                    dict[url] = links.AddLast(url);
                }
                if (File.Exists(_startPathHistory))
                {
                    foreach (var url in File.ReadLines(_startPathHistory).Where(Utils.IsNotNullOrWhiteSpace))
                    {
                        if (dict.TryGetValue(url, out var node))
                        {
                            links.Remove(node);
                        }
                        dict[url] = links.AddLast(url);
                    }
                }
                foreach (var url in links)
                {
                    starts.Enqueue(url);
                }
                File.Copy(_startPath, _startPath + ".bak", true);
                File.WriteAllLines(_startPath, links);
                File.WriteAllText(_startPathHistory, string.Empty);
                dict.Clear();
                links.Clear();
            }
            var urls = new Queue <Link>();

            //未下载完视频处理
            if (!Directory.Exists(_tempPath))
            {
                Directory.CreateDirectory(_tempPath);
            }
            if (!Directory.Exists(_storePath))
            {
                Directory.CreateDirectory(_storePath);
            }
            var tempFiles = Directory.GetFiles(_tempPath, "*.ts");
            var tempIds   = tempFiles.Select(f =>
            {
                var idMatch = Regex.Match(f, @"(\d+)-(\d+)\.ts");
                return(idMatch.Success ? idMatch.Groups[1].Value : string.Empty);
            }).GroupBy(f => f).Where(g => !string.IsNullOrEmpty(g.Key)).OrderByDescending(g => g.Count());

            foreach (var tempId in tempIds)
            {
                urls.Enqueue(new Link()
                {
                    Url = $"https://www.xvideos.com/video{tempId.Key}/_"
                });
            }
            //视频链接
            if (deep)
            {
                LoadVideoItems(visitedUrls, urls);
            }


            var socketHandler = new SocketsHttpHandler()
            {
                ConnectTimeout              = TimeSpan.FromSeconds(15),
                AutomaticDecompression      = DecompressionMethods.GZip | DecompressionMethods.Deflate,
                PooledConnectionIdleTimeout = TimeSpan.FromMinutes(5)
            };

            var client = new HttpClient(new HttpResponseTimeoutHandler(socketHandler)
            {
                ResponseTimeout = TimeSpan.FromSeconds(15)
            }, true)
            {
                Timeout = TimeSpan.FromMinutes(10),
                DefaultRequestHeaders =
                {
                    {
                        "User-Agent",
                        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
                    },
                    {
                        "Accept",
                        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
                    },
                    { "Accept-Encoding", "gzip, deflate, br" },
                    { "Connection",      "keep-alive"        }
                }
            };
            var parallelStreamGroup = new Stream[_parallelSize][];

            for (int i = 0; i < parallelStreamGroup.Length; i++)
            {
                var parallelStreams = new Stream[_parallelSize];
                for (int j = 0; j < parallelStreams.Length; j++)
                {
                    parallelStreams[j] = new MemoryStream();
                }
                parallelStreamGroup[i] = parallelStreams;
            }
            var    downPartQueue = new ConcurrentQueue <int>();
            var    parts         = new List <string>();
            var    noVideoSize   = 0;
            var    downSize      = -1;
            string currentStart  = string.Empty;
            var    hasDown       = false;

            while (running)
            {
                while (running && urls.TryDequeue(out var item))
                {
                    hasDown = true;
                    if (item.Depth >= _maxDepth || item.Url.Contains("THUMBNUM"))
                    {
                        continue;
                    }
                    try
                    {
                        var url     = HttpUtility.HtmlDecode(item.Url);
                        var videoId = Convert(url);
                        if (visitedUrls.Contains(videoId))
                        {
                            continue;
                        }
                        if (!url.StartsWith("https://") && !url.StartsWith("http://"))
                        {
                            url = baseHost + url;
                        }
                        Console.WriteLine("start down {0}", url);
                        downSize++;
                        var res = await client.GetStringOrNullAsync(url);

                        if (string.IsNullOrEmpty(res.Item2))
                        {
                            if (res.Item1)
                            {
                                visitedUrls.Add(videoId);
                                File.AppendAllText(_visitePath, url + Environment.NewLine);
                            }
                            await Task.Delay(1000);

                            continue;
                        }
                        var html      = res.Item2;
                        var tagFilter = Regex.Matches(html, @"/(tags|channels|model-channels|channels|amateur-channels|profiles)/([^""]+)", RegexOptions.Compiled).Any(m => m.Success && filter.Contains(m.Groups[2].Value));
                        if (tagFilter)
                        {
                            visitedUrls.Add(videoId);
                            File.AppendAllText(_visitePath, url + Environment.NewLine);
                            continue;
                        }

                        var models = Regex.Matches(html, @"""(/models/.+?)""", RegexOptions.Compiled)
                                     .Union(Regex.Matches(html, @"""(\\/(model-channels|channels|amateur-channels)\\/.+?)""", RegexOptions.Compiled)).Where(m => m.Success).Select(m =>
                        {
                            var tag = Regex.Unescape(m.Groups[1].Value);
                            if (filter.Contains(tag.Substring(tag.LastIndexOf('/') + 1)))
                            {
                                return(string.Empty);
                            }
                            var link = baseHost + tag;
                            starts.Enqueue(link);
                            return(link);
                        }).Distinct();
                        File.AppendAllLines(_startPath, models);
                        var relates = Regex.Matches(html, @"""(\\/video\d+?\\/.+?)""", RegexOptions.Compiled).Where(m => m.Success).Select(r =>
                        {
                            var l    = Regex.Unescape(r.Groups[1].Value);
                            var link = new Link()
                            {
                                Url = l, Depth = (byte)(item.Depth + 1)
                            };
                            if (deep)
                            {
                                urls.Enqueue(link);
                            }
                            return($"{link.Depth}|{link.Url}");
                        });
                        await File.AppendAllLinesAsync(_startItemPath, relates);

                        var match = Regex.Match(html, @"'(https://.+\.xvideos-cdn\.com/.+/hls\.m3u8.*)'");
                        if (!match.Success)
                        {
                            noVideoSize++;
                            if (noVideoSize >= 60)
                            {
                                running = false;
                            }
                            continue;
                        }
                        noVideoSize = 0;
                        var filePath = Guid.NewGuid().ToString();
                        var title    = Regex.Match(html, "<title>(.+) - XVIDEOS.COM</title>");
                        if (title.Success)
                        {
                            filePath = title.Groups[1].Value.Trim();
                        }

                        var hls = match.Groups[1].Value;
                        Console.WriteLine(hls);
                        html = (await client.GetStringOrNullAsync(hls)).Item2;
                        if (string.IsNullOrEmpty(html))
                        {
                            continue;
                        }

                        var hlsDic = new Dictionary <int, string>();
                        var reader = new StringReader(html);
                        while (reader.Peek() > -1)
                        {
                            var line = reader.ReadLine();
                            if (line.StartsWith("#EXT-X-STREAM-INF"))
                            {
                                var np = Regex.Match(line, @"(\d+)p");
                                hlsDic.Add(int.Parse(np.Groups[1].Value), reader.ReadLine());
                            }
                        }

                        var baseUrl = hls.Substring(0, hls.LastIndexOf('/') + 1);
                        var downHls = baseUrl + hlsDic.Where(kv => kv.Key < 1080).OrderByDescending(kv => kv.Key)
                                      .First().Value;
                        Console.WriteLine(downHls);
                        html = (await client.GetStringOrNullAsync(downHls)).Item2;
                        if (string.IsNullOrEmpty(html))
                        {
                            continue;
                        }

                        reader = new StringReader(html);
                        parts.Clear();
                        while (reader.Peek() > -1)
                        {
                            var line = reader.ReadLine();
                            if (line.StartsWith("#EXTINF:"))
                            {
                                downPartQueue.Enqueue(parts.Count);
                                parts.Add(reader.ReadLine());
                            }
                        }

                        if (parts.Count <= 0)
                        {
                            continue;
                        }
                        var success     = true;
                        var successSize = 0;
                        using (var commonCts = new CancellationTokenSource())
                        {
                            var downTasks = Enumerable.Range(0, parallelStreamGroup.Length).Select(async index =>
                            {
                                while (downPartQueue.TryDequeue(out var local) && success)
                                {
                                    var part    = parts[local];
                                    var partUrl = baseUrl + part;
                                    Console.WriteLine("task-{3} download {0} {2}/{1}", part, parts.Count, local + 1, index);
                                    var partFile = Path.Combine(_tempPath, $"{videoId}-{local}.ts");
                                    using (var file = File.Open(partFile, FileMode.OpenOrCreate, FileAccess.Write))
                                    {
                                        if (file.Length > 0)
                                        {
                                            file.Seek(file.Length, SeekOrigin.Begin);
                                        }

                                        var length = await client.HeadContentLength(partUrl);
                                        if (length < 0)
                                        {
                                            downPartQueue.Enqueue(local);
                                            continue;
                                        }
                                        length -= (int)file.Position;
                                        if (length == 0)
                                        {
                                            parts[local] = partFile;
                                            Console.WriteLine("download {0} success", Interlocked.Increment(ref successSize));
                                            await Task.Delay(100);
                                            continue;
                                        }
                                        var flag = true;
                                        using (var cancel = CancellationTokenSource.CreateLinkedTokenSource(commonCts.Token))
                                        {
                                            using (cancel.Token.Register(() =>
                                            {
                                                if (downPartQueue.Count <= _parallelSize)
                                                {
                                                    return;
                                                }
                                                commonCts.Cancel();
                                            }))
                                            {
                                                cancel.CancelAfter(_time);
                                                if (length <= 32 * 1024)
                                                {
                                                    flag = await client.GetRangeContent(partUrl, file, (int)file.Position, null, cancel.Token);
                                                }
                                                else
                                                {
                                                    var parallelStreams = parallelStreamGroup[index];
                                                    var partLength      = length / parallelStreams.Length;
                                                    var tasks           = new List <Task>();
                                                    var partFlags       = new bool[_parallelSize];
                                                    for (int i = 0, j = parallelStreams.Length - 1; i <= j; i++)
                                                    {
                                                        var pIndex      = i;
                                                        var stream      = parallelStreams[i];
                                                        stream.Position = 0;
                                                        stream.SetLength(0);
                                                        partFlags[i] = false;
                                                        int start    = partLength * i + (int)file.Position, end = (i == j ? length + (int)file.Position : start + partLength) - 1;
                                                        tasks.Add(Task.Run(async() =>
                                                        {
                                                            while (flag)
                                                            {
                                                                var _ = await client.GetRangeContent(partUrl, stream, start + (int)stream.Position, end, cancel.Token);
                                                                if (_)
                                                                {
                                                                    partFlags[pIndex] = true;
                                                                    return;
                                                                }
                                                                if (cancel.IsCancellationRequested)
                                                                {
                                                                    flag = false;
                                                                    return;
                                                                }
                                                                await Task.Delay(500);
                                                            }
                                                        }));
                                                    }
                                                    await Task.WhenAll(tasks);

                                                    for (int i = 0; i < parallelStreams.Length; i++)
                                                    {
                                                        var output = parallelStreams[i];
                                                        if (partFlags[i] || (i == 0 && output.Length > 0))
                                                        {
                                                            output.Position = 0;
                                                            await output.CopyToAsync(file);
                                                        }
                                                        else
                                                        {
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                        if (flag)
                                        {
                                            parts[local] = partFile;
                                            Console.WriteLine("download {0} success", Interlocked.Increment(ref successSize));
                                        }
                                        else if (downPartQueue.Count > _parallelSize)
                                        {
                                            success = false;
                                            Console.WriteLine("download {0} fail", part);
                                        }
                                        else
                                        {
                                            downPartQueue.Enqueue(local);
                                        }
                                    }
                                }
                            });

                            await Task.WhenAll(downTasks);
                        }

                        if (!success)
                        {
                            downPartQueue.Clear();
                            continue;
                        }
                        var path = Path.Combine(_storePath, $"{filePath}_{videoId}.ts");
                        while (path.Length >= 255)
                        {
                            filePath = filePath.Substring(0, 50);
                            path     = Path.Combine(_storePath, $"{filePath}_{videoId}.ts");
                        }

                        using (var video = File.Create(path))
                        {
                            foreach (var part in parts)
                            {
                                if (string.IsNullOrEmpty(part) || !File.Exists(part))
                                {
                                    continue;
                                }

                                using (var fileReader = File.OpenRead(part))
                                {
                                    fileReader.CopyTo(video);
                                }
                            }
                        }

                        parts.ForEach(File.Delete);
                        Console.WriteLine("success downland {0}", videoId);
                        visitedUrls.Add(videoId);
                        File.AppendAllText(_visitePath, url + Environment.NewLine);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                        urls.Enqueue(item);
                    }
                }
                if (!running)
                {
                    break;
                }
                if (!pages.TryPop(out var page))
                {
                    if (hasDown)
                    {
                        File.AppendAllLines(_startPathHistory, new[] { currentStart });
                    }
                    if (starts.TryDequeue(out currentStart))
                    {
                        hasDown = false;
                        if (currentStart.Contains("/tags/", StringComparison.OrdinalIgnoreCase))
                        {
                            page = currentStart + "/0";
                        }
                        else if (currentStart.Contains("/?k=", StringComparison.OrdinalIgnoreCase))
                        {
                            page = currentStart + "&p=0";
                        }
                        else
                        {
                            page = currentStart + "/videos/best/0";
                            pages.Push(currentStart + "/favorites/0");
                        }
                    }
                    else
                    {
                        if (!deep)
                        {
                            LoadVideoItems(visitedUrls, urls);
                            if (urls.Count > 0)
                            {
                                continue;
                            }
                        }
                        break;
                    }
                }

                if (!filter.Contains(page))
                {
                    if (downSize == 0)
                    {
                        await Task.Delay(1000);
                    }
                    Console.WriteLine(page);
                    var parentUrl = Regex.Replace(page, @"(\d+$)", string.Empty, RegexOptions.Compiled);
                    var pageRes   = (await client.GetStringOrNullAsync(page));
                    if (!string.IsNullOrEmpty(pageRes.Item2))
                    {
                        var html        = HttpUtility.HtmlDecode(pageRes.Item2);
                        var pageMatches = Regex.Matches(html, @"href=""#(\d+)""", RegexOptions.Compiled);
                        foreach (var pageMatch in pageMatches.Select(m => m.Groups[1].Value).Distinct())
                        {
                            var pageUrl = parentUrl + pageMatch;
                            if (filter.Contains(pageUrl))
                            {
                                continue;
                            }
                            pages.Push(pageUrl);
                        }
                        pageMatches = Regex.Matches(html, @"href=""(/favorite/\d+/.+?)""", RegexOptions.Compiled);
                        foreach (var pageMatch in pageMatches.Select(m => m.Groups[1].Value).Distinct())
                        {
                            pages.Push(baseHost + pageMatch);
                        }
                        pageMatches = Regex.Matches(html, @"href=""(/\?k=[^""]+?&p=\d+)""", RegexOptions.Compiled);
                        foreach (var pageMatch in pageMatches.Select(m => m.Groups[1].Value).Distinct())
                        {
                            pages.Push(baseHost + pageMatch);
                        }
                        var matches   = Regex.Matches(html, @"href=""(/prof-video-click/.+?|/video\d+/.+?)""", RegexOptions.Compiled);
                        var pageItems = matches.Where(m => m.Success)
                                        .Select(m => m.Groups[1].Value)
                                        .Distinct();
                        foreach (var pageItem in pageItems)
                        {
                            urls.Enqueue(new Link()
                            {
                                Url = pageItem
                            });
                        }
                        File.AppendAllLines(_startItemPath, pageItems);
                        filter.Add(page);
                    }
                }
                downSize = 0;
            }

            Console.WriteLine("over");
        }