public void OutHtmlToFlie(string url, string filepath) { string contenthtml = HttpClient(url); CaptureParameter param = getpara(); if (contenthtml == null) { return; } string SaveName = Path.GetFileNameWithoutExtension(url) + "." + MainForm.options.SaveExtension; try { while (MainForm.threadSwitch) { Thread.Sleep(1); } File.WriteAllText(Path.Combine(filepath, SaveName), OutHtml(CapturesBody(contenthtml, param))); MainForm.Sucess++; MainForm.OnDownLoadSucess?.Invoke(url); } catch (Exception) { throw; } }
public string[] CapturesBody(string html, CaptureParameter param) { HtmlNode MainBody; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNode Title = doc.DocumentNode.SelectSingleNode("//title"); if (param.GetMainMethod == GetMainMethodEnum.Id) { MainBody = doc.GetElementbyId(param.GetMainString); } else { MainBody = doc.DocumentNode.SelectSingleNode(param.GetMainString); } HtmlDocument mainBody = new HtmlDocument(); mainBody.LoadHtml(MainBody.OuterHtml); HtmlNode[] childlist = mainBody.DocumentNode.Descendants().ToArray(); foreach (var item in childlist) { if (MainForm.options.SaveImage) { if (item.Name == "img" && item.Attributes.Contains("src")) { if (!string.IsNullOrEmpty(item.Attributes["src"].Value)) { string imgsavename = Path.GetFileName(item.Attributes["src"].Value); if (!Directory.Exists(MainForm.options.SavePath + "\\images\\")) //如果不存在就创建file文件夹 { Directory.CreateDirectory(MainForm.options.SavePath + "\\images\\"); //创建该文件夹 } Thread t = new Thread(new ParameterizedThreadStart(DownloadImage)); t.IsBackground = true; t.Start(item.Attributes["src"].Value); item.Attributes.Remove("src"); item.SetAttributeValue("src", MainForm.options.SavePath + "\\images\\" + imgsavename); } } } if (param.Nodeoperate.Count > 0) { foreach (var paramitem in param.Nodeoperate) { capture.CaptureFun(item, paramitem); } } } if (MainBody.FirstChild.OuterHtml.Length <= 1) { MainBody.FirstChild.Remove(); } string result = mainBody.DocumentNode.InnerHtml; return(new string[] { Title.InnerHtml, result }); }
public CaptureParameter getpara() { CaptureParameter param = new CaptureParameter(); param.Nodeoperate = new List <NodeOperate>(); param.GetMainMethod = GetMainMethodEnum.Id; param.GetMainString = "mainBody"; NodeOperate removeall = new NodeOperate(); param.Nodeoperate.Add(removeall); NodeOperate removekeep = new NodeOperate(); param.Nodeoperate.Add(removekeep); NodeOperate delattribute = new NodeOperate(); param.Nodeoperate.Add(delattribute); removeall.method = MethodEnum.RemoveAll; removekeep.method = MethodEnum.Remove; delattribute.method = MethodEnum.DelAttribute; removeall.parameterlist = new List <parameterlist>(new parameterlist[] { new parameterlist() { parameter = new List <string> { null, "class", "codeSnippetToolBar|codeSnippetContainerTabs" } }, //new parameterlist() {parameter = new List<string>{ null, "class", "codeSnippetContainerTabs" }}, new parameterlist() { parameter = new List <string> { null, "class", "LW_CollapsibleArea_Anchor_Div" } }, new parameterlist() { parameter = new List <string> { null, "class", "LW_CollapsibleArea_HrDiv" } }, new parameterlist() { parameter = new List <string> { null, "class", "cl_CollapsibleArea_expanding" } } }); removekeep.parameterlist = new List <parameterlist>(new parameterlist[] { new parameterlist() { parameter = new List <string> { null, "class", "LW_CollapsibleArea_TitleAhref" } }, new parameterlist() { parameter = new List <string> { null, "class", "LW_CollapsibleArea_Title" } }, new parameterlist() { parameter = new List <string> { null, "class", "codeSnippetContainerCodeContainer" } }, new parameterlist() { parameter = new List <string> { null, "class", "codeSnippetContainer" } }, new parameterlist() { parameter = new List <string> { null, "class", "codeSnippetContainerCode" } }, new parameterlist() { parameter = new List <string> { null, "class", "sectionblock" } }, new parameterlist() { parameter = new List <string> { null, "class", "introduction" } }, new parameterlist() { parameter = new List <string> { "div", "class", "section" } }, new parameterlist() { parameter = new List <string> { "div", null, null } }, new parameterlist() { parameter = new List <string> { "span", "class", "sentence" } }, new parameterlist() { parameter = new List <string> { "sentencetext", null, "sentence" } } }); delattribute.parameterlist = new List <parameterlist>(new parameterlist[] { new parameterlist() { parameter = new List <string> { "strong", "xmlns", null, } }, new parameterlist() { parameter = new List <string> { "img", "xmlns", null, } }, new parameterlist() { parameter = new List <string> { "img", "id", null, } }, new parameterlist() { parameter = new List <string> { "span", "xmlns", null, } }, new parameterlist() { parameter = new List <string> { "h2", "class", null, } }, new parameterlist() { parameter = new List <string> { "h2", "xmlns:xlink", null, } }, new parameterlist() { parameter = new List <string> { "h2", "xmlns: html", null, } } }); return(param); }