Exemplo n.º 1
0
        async Task handle(string url)
        {
            if (VisitedUrl.Add(url))
            {
                var context = BrowsingContext.New(Configuration.Default.WithDefaultLoader());
                try
                {
                    var document = await context.OpenAsync(url);

                    foreach (var item in document.QuerySelectorAll("[href]"))
                    {
                        var u = fixUrl(item.GetAttribute("href"));
                        try
                        {
                            if (new Uri(u).Host.EndsWith(StartUrl.Host) && !PendingUrl.Contains(u) && !VisitedUrl.Contains(u))
                            {
                                PendingUrl.Enqueue(u);
                            }
                        }
                        catch (System.UriFormatException)
                        {
                            ErrorUrl.Add(u);
                        }
                    }

                    foreach (var item in document.QuerySelectorAll("[src]"))
                    {
                        var u = fixUrl(item.GetAttribute("src"));
                        if (FileTypes.Contains(u.Split('.')[^ 1]) && VisitedUrl.Add(u))
Exemplo n.º 2
0
 public async Task run()
 {
     Console.WriteLine($"\n初始网址: {StartUrl}\n");
     Console.WriteLine("文件类型:");
     foreach (var fileType in FileTypes)
     {
         Console.WriteLine($"    {fileType}");
     }
     PendingUrl.Enqueue(StartUrl.ToString());
     while (PendingUrl.Count > 0)
     {
         await handle(PendingUrl.Dequeue());
     }
 }