예제 #1
0
 private void OnLinkProcessedHandler(TaskData link)
 {
     OnLinkProcessed?.Invoke(link);
     //throw new NotImplementedException();
 }
예제 #2
0
        private List <Link> ProcessUrl(TaskData taskData)
        {
            var result = new List <Link>();
            var domain = (new Uri(taskData.Link.Url)).Host;

            /// DEBUG!
            //if(domain.Contains("vk"))
            //{

            //}
            try
            {
                WebRequest request = WebRequest.Create(taskData.Link.Url);
                request.Credentials = CredentialCache.DefaultCredentials;
                request.Headers.Add("User-Agent", "PostmanRuntime/7.24.0");
                WebResponse response = request.GetResponse();
                taskData.Link.ContentType = response.ContentType;

                int MaxDepth = WebCrawler.Instance.settings.MaxDepth;

                string responseString = "";

                // вычитыаем html
                using (var reader = new StreamReader(response.GetResponseStream()))
                {
                    responseString = reader.ReadToEnd();
                }

                taskData.Link.ResponseLength = responseString.Length;

                // если глубина не превысила целевую
                // ищем ссылки глубже
                if (taskData.DepthLevel <= MaxDepth)
                {
                    // парсим
                    var raw        = HtmlAgilityPack(responseString);
                    var childLinks = MakeAbsolutUrls(raw, domain);
                    //.ToList();

                    foreach (var linkUrl in childLinks)
                    {
                        TaskData newTask = new TaskData()
                        {
                            DepthLevel = taskData.DepthLevel + 1, IsDone = false, Link = new Link()
                            {
                                Url = linkUrl
                            }
                        };
                        QueueManager.AddTask(newTask);

                        taskData.ChildTasks.Add(newTask);
                    }
                    //if (childLinks.Count > 0)
                    //	OnChildLinkProcessed?.Invoke(result);
                }
            }
            catch (WebException ex)
            {
                taskData.Link.ContentType = "failed";
                //Console.WriteLine($"FAIL: {taskData.Link.Url}");
            }
            catch (Exception ex)
            {
                taskData.Link.ContentType = "failed";
                //Console.WriteLine($"FAIL: {taskData.Link.Url}");
            }
            finally
            {
                taskData.IsDone = true;
            }

            OnLinkProcessed?.Invoke(taskData);


            return(result);
        }