Ejemplo n.º 1
0
        private void ProcessingFunction()
        {
            while (DoProcessing)
            {
                try
                {
                    TaskData taskData = QueueManager.GetTask();
                    if (taskData == null)
                    {
                        Thread.Sleep(250);
                        continue;
                    }
                    var candidates = ProcessUrl(taskData);

                    if (candidates != null && candidates.Count > 0)
                    {
                        OnChildLinkProcessed?.Invoke(candidates);
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }
Ejemplo n.º 2
0
 private void OnLinkProcessedHandler(TaskData link)
 {
     OnLinkProcessed?.Invoke(link);
     //throw new NotImplementedException();
 }
Ejemplo n.º 3
0
        private List <Link> ProcessUrl(TaskData taskData)
        {
            var result = new List <Link>();
            var domain = (new Uri(taskData.Link.Url)).Host;

            /// DEBUG!
            //if(domain.Contains("vk"))
            //{

            //}
            try
            {
                WebRequest request = WebRequest.Create(taskData.Link.Url);
                request.Credentials = CredentialCache.DefaultCredentials;
                request.Headers.Add("User-Agent", "PostmanRuntime/7.24.0");
                WebResponse response = request.GetResponse();
                taskData.Link.ContentType = response.ContentType;

                int MaxDepth = WebCrawler.Instance.settings.MaxDepth;

                string responseString = "";

                // вычитыаем html
                using (var reader = new StreamReader(response.GetResponseStream()))
                {
                    responseString = reader.ReadToEnd();
                }

                taskData.Link.ResponseLength = responseString.Length;

                // если глубина не превысила целевую
                // ищем ссылки глубже
                if (taskData.DepthLevel <= MaxDepth)
                {
                    // парсим
                    var raw        = HtmlAgilityPack(responseString);
                    var childLinks = MakeAbsolutUrls(raw, domain);
                    //.ToList();

                    foreach (var linkUrl in childLinks)
                    {
                        TaskData newTask = new TaskData()
                        {
                            DepthLevel = taskData.DepthLevel + 1, IsDone = false, Link = new Link()
                            {
                                Url = linkUrl
                            }
                        };
                        QueueManager.AddTask(newTask);

                        taskData.ChildTasks.Add(newTask);
                    }
                    //if (childLinks.Count > 0)
                    //	OnChildLinkProcessed?.Invoke(result);
                }
            }
            catch (WebException ex)
            {
                taskData.Link.ContentType = "failed";
                //Console.WriteLine($"FAIL: {taskData.Link.Url}");
            }
            catch (Exception ex)
            {
                taskData.Link.ContentType = "failed";
                //Console.WriteLine($"FAIL: {taskData.Link.Url}");
            }
            finally
            {
                taskData.IsDone = true;
            }

            OnLinkProcessed?.Invoke(taskData);


            return(result);
        }