Example #1
0
        public void OnFailed(string processUrl, int processLevel, Exception ex)
        {
            Console.ForegroundColor = ConsoleColor.Red;
            Console.WriteLine($"{processLevel}, {processUrl}, {CollectTools.GetExceptionMessages(ex)}");
            Console.ResetColor();

            //delete url from 0
            //move url to 1
            CollectionStore.MoveUrl(rootUrl, new CollectionStore.UrlModel()
            {
                Url           = processUrl,
                Level         = processLevel,
                localFileName = null
            });
        }
Example #2
0
        public string LoadHtml()
        {
            if (string.IsNullOrEmpty(this.url))
            {
                throw new ArgumentNullException("Url");
            }

            string htmlContent = CollectTools.GetHtmlContent(url, encoding);

            if (string.IsNullOrEmpty(htmlContent))
            {
                throw new Exception($"Load html content by url = {url} and encoding = {encoding.ToString()} failure.");
            }

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(htmlContent);

            this.htmlNode = document.DocumentNode;
            return(htmlContent);
        }
Example #3
0
        public string Save(string rootUrl, ExtractResult result)
        {
            if (result == null || result.ContentResults == null)
            {
                return(null);
            }
            if (result.ContentResults.ContainsKey("title") && !result.ContentResults["title"].HasValue())
            {
                return(null);
            }
            if (result.ContentResults.ContainsKey("content") && !result.ContentResults["content"].HasValue())
            {
                return(null);
            }

            string saveDir  = $"E:\\hwl-collect\\{HttpUtility.UrlEncode(rootUrl)}";
            string savePath = $"{saveDir}\\{Guid.NewGuid().ToString()}.txt";

            try
            {
                if (!Directory.Exists(saveDir))
                {
                    Directory.CreateDirectory(saveDir);
                }
                File.AppendAllLines(savePath, new string[] { result.OriginUrl, result.Level.ToString(), result.ConvertContentToJsonString() }, Encoding.UTF8);
                return(savePath);
            }
            catch (Exception ex)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine($"Save html content to path of {savePath} failed, {CollectTools.GetExceptionMessages(ex)}.");
                Console.ResetColor();
            }

            return(null);
        }