示例#1
0
 private Task <InspectResult> InspectPageAsync(string url)
 {
     return(Task.Run(() =>
     {
         InspectResult iRes = new InspectResult {
             selfLink = url
         };
         string hrefPattern = "href\\s*=\\s*(?:[\"'])(http[^\"']*|[^\"']*\\.html?|[^\"']*\\.php\\??)(?:[\"'])";
         WebClient client = new WebClient();
         try
         {
             string source = client.DownloadString(url);
             Match m = Regex.Match(source, hrefPattern,
                                   RegexOptions.IgnoreCase | RegexOptions.Compiled,
                                   TimeSpan.FromSeconds(1));
             while (m.Success)
             {
                 var link = m.Groups[1].Value;
                 if (!link.StartsWith("http"))
                 {
                     link = url.Split('?')[0] + link;
                 }
                 iRes.links.Add(link);
                 m = m.NextMatch();
             }
             iRes.length = source.Length;
         }
         catch (Exception e)
         {
             Console.WriteLine("Unrezolvable path occured");
         }
         return iRes;
     }));
 }
示例#2
0
        public async Task <InspectResult> GoInspectAsync(string url, int depth)
        {
            List <Task <InspectResult> > runningTasks = new List <Task <InspectResult> >();
            InspectResult iRes = await InspectPageAsync(url);

            if (depth > 0)
            {
                foreach (var link in iRes.links)
                {
                    runningTasks.Add(GoInspectAsync(link, depth - 1));
                }
                foreach (var runningTask in runningTasks)
                {
                    iRes.children.Add(await runningTask);
                }
            }
            return(iRes);
        }