//======================Main Function====================== /// <summary> /// 中药方剂图像 /// </summary> private static void GetImageFromWeb2() { List <Medicine> medicineList = new List <Medicine>(); int TotalPageId = 46;//总共页数 if (!System.IO.Directory.Exists(MTsFilePath)) { System.IO.Directory.CreateDirectory(MTsFilePath); } for (int pageId = 1; pageId <= TotalPageId; pageId++) { Console.WriteLine($"开始 第 {pageId} 页下载任务."); #region "构造请求地址" var Mic_sourceUrl = @$ "https://sys02.lib.hkbu.edu.hk/cmfid/index.asp?query=&lang=chs&pageid={pageId}"; #endregion "构造请求地址" //加载HTML var sourceHtmlDom = AnalyticalContent.GetHtml(Mic_sourceUrl);//下载显微鉴别 类型的 //HTML 解析成 IDocument对象 var dom = htmlParser.ParseDocument(sourceHtmlDom); //解析 提取 #region "提取目标" var textItems = dom.QuerySelectorAll("table#main_content_tb tbody tr td font a"); //元素选择器 var imgItems = dom.QuerySelectorAll("table#main_content_tb tbody tr td a img"); //元素选择器 Medicine medicine = new Medicine { text = new List <string>(), img = new List <string>() }; for (int i = 0; i < 4; i++) { //拿到文字 var txt = textItems[i].InnerHtml; medicine.text.Add(txt); //拿到图片地址 var ImgUrl = "https://sys02.lib.hkbu.edu.hk/cmfid/" + imgItems[i].GetAttribute("src"); medicine.img.Add(ImgUrl); //保存图片 //D:\Documents\ASP.NETCorRoadMap\src\ASP.NETCorRoadMap\ConsoleApp1\bin\Debug\netcoreapp3.1\中药方剂图像\xx.jpg var oneFileName = txt + Path.GetExtension(ImgUrl); FinallyPath = Path.Combine(MTsFilePath, oneFileName); AnalyticalContent.GetImgRes(ImgUrl, FinallyPath); } medicineList.Add(medicine); #endregion "提取目标" Console.WriteLine($"结束 第 {pageId} 页下载任务."); Console.WriteLine("==========================================="); } }
/// <summary> /// 之前的显微鉴别 /// </summary> private static void GetImageFromWeb() { if (!System.IO.Directory.Exists(MTsFilePath)) { System.IO.Directory.CreateDirectory(MTsFilePath); } //有效值001-421 int Count = 421; string pad = ""; List <string> test = new List <string>(); List <string> test1 = new List <string>(); List <string> test2 = new List <string>(); List <string> test3 = new List <string>(); List <string> test4 = new List <string>(); List <MyData> MyData = new List <MyData>(); //注意 for (int j = 409; j <= Count; j++) { Console.WriteLine($"第 {j} 次下载任务."); MyData data = new MyData(); data.imgs = new List <string>(); #region "构造请求地址" var sourceUrl = "http://libproject.hkbu.edu.hk/was40/detail?channelid=47953&lang=chs&searchword=pid=B00"; //下载带显微鉴别 的中药 的图片 pad = j.ToString().PadLeft(3, '0'); sourceUrl += pad; var Mic_sourceUrl = @$ "http://libproject.hkbu.edu.hk/was40/function/cmmid_micro_uat.jsp?id=B00{pad}&lang=chs"; #endregion "" //加载HTML var sourceHtmlDom = AnalyticalContent.GetHtml(Mic_sourceUrl);//下载显微鉴别 类型的 //HTML 解析成 IDocument对象 var dom = htmlParser.ParseDocument(sourceHtmlDom); //解析 提取 #region "提取目标名称" var fileName = dom.QuerySelectorAll("p.text2"); if (fileName != null) { foreach (var p in fileName) { var name = AnalyticalContent.HtmlToPlainText(p.InnerHtml);// 沉香 Chenxiang if (!string.IsNullOrEmpty(name)) { name = name.Trim(); //"人参 Renshen" var txtarr = name.Split(' '); data.text = txtarr[0]; Console.WriteLine($"资源名称: {data.text}"); #region "提取底部文字" var bottomTxt = dom.QuerySelectorAll("p.text"); if (bottomTxt != null) { foreach (var t in bottomTxt) { var txt = AnalyticalContent.HtmlToPlainText(t.InnerHtml);// 沉香 Chenxiang txt = txt.Split("本记录")[0]; txt = txt.Replace(">", ""); var secondDir = Path.Combine(MTsFilePath, data.text); if (!System.IO.Directory.Exists(secondDir)) { System.IO.Directory.CreateDirectory(secondDir); } var oneFileName = data.text + ".txt"; var fPath = Path.Combine(secondDir, oneFileName); FileInfo myFile = new FileInfo(fPath); StreamWriter sw = myFile.CreateText(); string[] strs = { txt }; foreach (var s in strs) { sw.WriteLine(s); } sw.Close(); //存文本到txt } } #endregion "提取底部文字" #region "提取目标图片地址" var image = dom.QuerySelectorAll("img"); if (image != null) { int count = 0; foreach (var item in image) { var img = item.OuterHtml; //"<img src=\"images_mmd/trans.png\" height=\"18\">" if (img.Contains("trsimage/mmd/micro")) //trsimage之前 //目标图片 { var src = item.GetAttribute("src"); //"../trsimage/mmd/B00421.jpg" var s = src.Replace("..", ""); var findSrc = "http://libproject.hkbu.edu.hk" + s; //"http://libproject.hkbu.edu.hk/../trsimage/mmd/B00421.jpg" Console.WriteLine($"资源地址: { findSrc}"); data.imgs.Add(findSrc); //下载 var secondDir = Path.Combine(MTsFilePath, data.text); if (!System.IO.Directory.Exists(secondDir)) { System.IO.Directory.CreateDirectory(secondDir); } count++; var oneFileName = count.ToString() + Path.GetExtension(findSrc); FinallyPath = Path.Combine(secondDir, oneFileName); AnalyticalContent.GetImgRes(findSrc, FinallyPath); } } } #endregion "提取目标图片地址" } } } #endregion "提取目标名称" } }