private void UpdateNovel(Catalog log, HtmlDocument doc) { col = null; using (CrawelNovelDbContext context = new CrawelNovelDbContext()) { string MaxUrl = string.Empty; if (rbBQG.Checked) { MaxUrl = context.Chapter.Where(c => c.NoteBookId.Equals(log.Id)).Max(c => c.ChapterUrl); } else { MaxUrl = context.Chapter.Where(c => c.NoteBookId.Equals(log.Id)).Min(c => c.ChapterUrl); } var chap = context.Chapter.FirstOrDefault(f => f.ChapterUrl.Equals(MaxUrl)); col = doc.DocumentNode.SelectNodes("//dd"); if (rbJPXS.Checked) { col.RemoveAt(0); col.RemoveAt(col.Count - 1); } ProgressCount = col.Count; CataUrl = log.Url; CataId = log.Id; this.backgroundWorker1.RunWorkerAsync(chap); //运行backgroundWorker组件 } ProgressForm form = new ProgressForm(this.backgroundWorker1); //显示进度条窗体 form.ShowDialog(this); }
public bool StartParsing() { newsItems.Clear(); WebPage page; try { page = web.NavigateToPage(new Uri(sourceUrl)); } catch { return(false); } HtmlNode node = page.Html.SelectSingleNode("//div[@class='lenta_news__day']"); int count = 0; HtmlNodeCollection childNodes = node.ChildNodes; childNodes.RemoveAt(0); childNodes.RemoveAt(0); foreach (var child in childNodes) { if (count > 8) { break; } if (child.Name == "#text") { continue; } newsItems.Add(ParseWebPage("https://" + new Uri(sourceUrl).Host + child.ChildNodes[1].GetAttributeValue("href", ""))); count++; } return(true); }
private HtmlNodeCollection RemoveSurplusChap(HtmlNodeCollection chapList, int startNode, int endNode) { for (int i = 0; i < startNode; i++) { chapList.RemoveAt(0); } for (int i = 0; i < endNode; i++) { chapList.RemoveAt(chapList.Count - 1); } return(chapList); }
// 搜索数据 private void SoBookData() { var title = this.txtBookName.Text; string url = "https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqukan.com&q=" + title; HtmlWeb web = new HtmlWeb(); //从url中加载 HtmlAgilityPack.HtmlDocument doc = web.Load(url); HtmlNode headNode = doc.DocumentNode.SelectSingleNode("//ul"); HtmlNodeCollection aCollection = headNode.SelectNodes("li"); if (aCollection.Count <= 0) { return; } aCollection.RemoveAt(0); foreach (var item in aCollection) { ListViewItem tt = new ListViewItem(); tt.SubItems[0].Text = item.SelectNodes("span")[0].InnerText; tt.SubItems.Add(item.SelectNodes("span")[1].InnerText); tt.SubItems.Add(item.SelectNodes("span")[2].InnerText); tt.SubItems.Add(item.SelectNodes("span")[1].SelectNodes("a")[0].Attributes["href"].Value); listView.Add(tt); } }
public void getAnimeList() { /* gets a list of potential anime to recommend */ List <String> ids = getInfo.TblGetIDString(collector.sort_remove_select_gen()); /* Reset the table in case information is present from a previous attempt */ getInfo.resetRecommendationTable(); HtmlNodeCollection nodes = getSearchNode(ids); /* specifies a limit on how many recommendations you want to give */ int limit = 15; if (nodes == null) { extract_seperateNodes(ids, limit / 3); } else { nodes.RemoveAt(0); extractNodes(nodes, limit); if (nodes.Count < limit) { limit -= nodes.Count; extract_seperateNodes(ids, (int)Math.Round((double)limit / 3)); } } swapScreen?.Invoke(this, new SecondWindow()); }
public static void FetchScienceIl() { var results = new List <Element>(); if (File.Exists("elements.json")) { _cachedElements = new List <Element>(Parse()); return; } using (var wc = new WebClient()) { const string elementsUrl = "http://www.science.co.il/PTelements.asp?s=Weight"; const string elementsPath = "//table[@class='tabint8']/tr"; string src = wc.DownloadString(elementsUrl); var doc = new HtmlDocument(); doc.LoadHtml(src); HtmlNode root = doc.DocumentNode; HtmlNodeCollection xPathQuery = root.SelectNodes(elementsPath); xPathQuery.RemoveAt(0); xPathQuery.RemoveAt(xPathQuery.Count - 1); foreach (var node in xPathQuery) { Element element = new Element(); HtmlNode[] properties = node.Elements("td").ToArray(); element.AtomicWeight = float.Parse(properties[2].InnerText); element.Name = properties[3].InnerText; element.Symbol = properties[4].InnerText; element.MeltingPoint = float.Parse(string.IsNullOrEmpty(properties[5].InnerText) ? "0" : properties[5].InnerText); element.BoilingPoint = float.Parse(string.IsNullOrEmpty(properties[6].InnerText) ? "0" : properties[6].InnerText); element.Density = float.Parse(string.IsNullOrEmpty(properties[7].InnerText) ? "0" : properties[7].InnerText); element.AbundanceInEarth = float.Parse(string.IsNullOrEmpty(properties[8].InnerText) ? "0" : properties[8].InnerText); element.Group = int.Parse(properties[10].InnerText); element.FirstIonizationEnergy = float.Parse(string.IsNullOrEmpty(properties[12].InnerText) ? "0" : properties[12].InnerText); results.Add(element); } } _cachedElements = new List <Element>(results); }
private static StationDTO ParseStationHTML(string html) { StationDTO station = new StationDTO(); station.arrives = new List <StationEntryDTO>(); station.departs = new List <StationEntryDTO>(); HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(html); HtmlNode infoTable = htmlDocument.DocumentNode.SelectSingleNode("html/body/table/tr[2]/td/table"); HtmlNodeCollection rows = infoTable.SelectNodes("tr"); rows.RemoveAt(0); rows.RemoveAt(0); bool departing = true; foreach (var row in rows) { if (row.Attributes["bgcolor"].Value.ToLower() != "#f5f8fa") { departing = false; continue; } StationEntryDTO entry = new StationEntryDTO(); HtmlNodeCollection columns = row.SelectNodes("td"); entry.station = columns[0].InnerText.Trim(); entry.train = columns[1].InnerText.Trim();; entry.time = columns[2].InnerText.Trim();; if (departing) { station.departs.Add(entry); } else { station.arrives.Add(entry); } } return(station); }
void parser() { string html = request(); while (html == "") { //play error sound System.Media.SystemSounds.Beep.Play(); Thread.Sleep(1000 * 60); html = request(); } //parse var document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//tr"); collection.RemoveAt(0); foreach (HtmlNode rows in collection) { var cells = rows.SelectNodes("th|td"); for (int i = cells.Count - 1; i >= 0; i--) { string inner = cells[i].InnerHtml; var nospace = inner.Trim(); if (nospace.Length == 0) { cells.RemoveAt(i); } } for (int i = 0; i < cells.Count; i += 3) { string groupNumber = cells[i].InnerHtml; string block = cells[i + 1].InnerText; string people = cells[i + 2].InnerHtml; if ((block.Contains("Mo. 1. Block") || block.Contains("Mo. 5. Block")) && block.Contains("kyb")) { var anVer = people.Split('/'); var an = int.Parse(anVer[0]); var ver = int.Parse(anVer[1]); if (an < ver) { notifyIcon1.BalloonTipText = block + " " + an + "/" + ver + " Plätze"; notifyIcon1.BalloonTipTitle = "Gruppe frei!"; notifyIcon1.ShowBalloonTip(19000); } } } } }
public void ProcessCourse() { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(course_result); HtmlNode table = htmlDoc.DocumentNode.SelectSingleNode("//table[@class='table listTable']"); HtmlNodeCollection tableNodes = table.ChildNodes; HtmlNodeCollection trNodes = new HtmlNodeCollection(table); foreach (var n in tableNodes) { if (n.Name == "tr") { trNodes.Add(n); } } trNodes.RemoveAt(0); //第一个tr节点非课程项,故去除 foreach (var n in trNodes) { HtmlNodeCollection tdNodes = n.ChildNodes; var itemlist = new List <String>(); foreach (var td in tdNodes) //每个tdNodes中含27个td { var a = td.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", ""); itemlist.Add(a); } using (var context = new jwContext()) { var thisLessonNum = itemlist[1]; //直接将itemlist[1]放入Linq表达式将报错 var course = context.Courses.SingleOrDefault(c => c.StuID == stuid && c.LessonNum == thisLessonNum); if (course == null) //确保表中不存在此项记录 { var newcourse = new Course { StuID = stuid, LessonNum = itemlist[1], LessonName = itemlist[3], LessonType = itemlist[5], LearninType = itemlist[7], TeachingCollege = itemlist[9], Teacher = itemlist[11], Specialty = itemlist[13], Credit = itemlist[15], LessonHours = itemlist[17], Time = itemlist[19], Note = itemlist[21] }; context.Courses.Add(newcourse); context.SaveChanges(); } } } }
private static PriceDTO ParsePriceResponse(string priceResponse) { PriceDTO price = new PriceDTO(); HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(priceResponse); HtmlNodeCollection tables = htmlDocument.DocumentNode.SelectNodes("html/body/table"); HtmlNode induvidialsTable = tables[2]; HtmlNodeCollection dataRows = induvidialsTable.SelectNodes("tbody/tr"); dataRows.RemoveAt(0); price.regularFeeSecondClass = (dataRows[0].SelectNodes("td")[1].InnerText.Trim()); price.regularFeeFirstClass = (dataRows[0].SelectNodes("td")[2].InnerText.Trim()); price.decreasedFeeSecondClass = (dataRows[1].SelectNodes("td")[1].InnerText.Trim()); price.decreasedFeeFirstClass = (dataRows[1].SelectNodes("td")[2].InnerText.Trim()); price.decreasedFeeWithFirstIncludedSecondClass = (dataRows[2].SelectNodes("td")[1].InnerText.Trim()); price.decreasedFeeWithFirstIncludedFirstClass = (dataRows[2].SelectNodes("td")[2].InnerText.Trim()); if (dataRows.Count == 4) { price.bothWaysSecondClass = (dataRows[3].SelectNodes("td")[1].InnerText.Trim()); price.bothWaysFirstClass = (dataRows[3].SelectNodes("td")[2].InnerText.Trim()); } else { price.relationalSecondClass = (dataRows[3].SelectNodes("td")[1].InnerText.Trim()); price.relationalFirstClass = (dataRows[3].SelectNodes("td")[2].InnerText.Trim()); price.bothWaysSecondClass = (dataRows[4].SelectNodes("td")[1].InnerText.Trim()); price.bothWaysFirstClass = (dataRows[4].SelectNodes("td")[2].InnerText.Trim()); } HtmlNode groupsTable = tables[3]; HtmlNodeCollection dataRowsGroups = groupsTable.SelectNodes("tbody/tr"); dataRowsGroups.RemoveAt(0); price.groupRegularSecondClass = (dataRowsGroups[0].SelectNodes("td")[1].InnerText.Trim()); price.groupRegularFirstClass = (dataRowsGroups[0].SelectNodes("td")[2].InnerText.Trim()); price.groupDecreasedSecondClass = (dataRowsGroups[1].SelectNodes("td")[1].InnerText.Trim()); price.groupDecreasedFirstClass = (dataRowsGroups[1].SelectNodes("td")[2].InnerText.Trim()); return(price); }
private static TrainDTO ParseTrainHTML(string html) { HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(html); HtmlNode infoTable = htmlDocument.DocumentNode.SelectSingleNode("html/body/table/tr[not (@align)]/td/table"); HtmlNodeCollection infoRows = infoTable.SelectNodes("tr"); infoRows.RemoveAt(0); infoRows.RemoveAt(0); infoRows.RemoveAt(infoRows.Count - 2); HtmlNode TrainOptions = infoRows[infoRows.Count - 1]; infoRows.RemoveAt(infoRows.Count - 1); TrainDTO train = new TrainDTO(); train.stops = new List <TrainSimpleStopDTO>(); train.options = new List <string>(); foreach (var stopRow in infoRows) { HtmlNodeCollection columns = stopRow.SelectNodes("td"); train.stops.Add(new TrainSimpleStopDTO() { station = columns[0].InnerText.Trim(), arrives = columns[1].InnerText.Trim(), departs = columns[2].InnerText.Trim() }); } foreach (var image in TrainOptions.SelectSingleNode("td").SelectSingleNode("span").SelectNodes("img")) { train.options.Add(image.Attributes["title"].Value); } return(train); }
/// <summary> /// Grabs the required trs from the market table after calculating the range from the base number. /// </summary> /// <param name="marketURL">The market URL</param> /// <param name="openMarketBaseNumber">The open market base number</param> /// <returns>HtmlNodeCollection</returns> private HtmlNodeCollection DownloadMarketData(string marketURL, int openMarketBaseNumber) { // Define the range baseNumber = Math.Round(Convert.ToDecimal(openMarketBaseNumber), 2); baseNumberPlus50 = baseNumber + 100; baseNumberPlus100 = baseNumber + 200; baseNumberPlus150 = baseNumber + 300; baseNumberPlus200 = baseNumber - 100; baseNumberMinus50 = baseNumber - 200; baseNumberMinus100 = baseNumber - 300; // Grab all rows var htmlWeb = new HtmlWeb(); HtmlAgilityPack.HtmlDocument htmlDocument = htmlWeb.Load(marketURL); HtmlNodeCollection tableRows = htmlDocument.DocumentNode.SelectNodes("//table[@id=\"octable\"]//tr"); tableRows.RemoveAt(tableRows.Count - 1); tableRows.RemoveAt(0); tableRows.RemoveAt(0); // Get only those rows which contain values for the defined tange HtmlNodeCollection workSetRows = new HtmlNodeCollection(null); foreach (var currentTableRow in tableRows) { if (currentTableRow.InnerHtml.Contains(baseNumber.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus50.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus100.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberMinus50.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberMinus100.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus150.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus200.ToString())) { workSetRows.Add(currentTableRow); } } return(workSetRows); }
private void CrawJpxsWebSite() { string htmlContent = GetContent(txtWebSite.Text); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlContent); Catalog log = new Catalog(); HtmlNode navFM = doc.GetElementbyId("fmimg"); string ImgUrl = navFM.ChildNodes[0].Attributes["src"].Value; log.NoteName = navFM.ChildNodes[0].Attributes["alt"].Value.UrlDecode(); if (CheckIfExitNovelName(log.NoteName)) { MessageBox.Show("数据库已存在该小说"); return; } log.Img = GetImage(JpxsAddress + ImgUrl); log.CreateTime = DateTime.Now; log.Url = txtWebSite.Text; col = doc.DocumentNode.SelectNodes("//dd"); col.RemoveAt(0); col.RemoveAt(col.Count - 1); CataId = SaveCatalog(log); txtNovel.Text = log.NoteName; //Parser parser = new Parser(urls); ProgressCount = col.Count; this.backgroundWorker1.DoWork += backgroundWorker1_DoWork; this.backgroundWorker1.RunWorkerCompleted += backgroundWorker1_RunWorkerCompleted; this.backgroundWorker1.RunWorkerAsync(); //运行backgroundWorker组件 ProgressForm form = new ProgressForm(this.backgroundWorker1); //显示进度条窗体 form.ShowDialog(this); }
private void extract_seperateNodes(List <String> ids, int limit) { List <String>[] setOfIds = seperateSearch(ids); foreach (List <String> indivIds in setOfIds) { HtmlNodeCollection collectNodes = getSearchNode(indivIds); if (collectNodes == null) { continue; } collectNodes.RemoveAt(0); extractNodes(collectNodes, limit); } }
public static HtmlNodeCollection SearchISIN(string companyName, bool onlyEquity, bool onlyNonListing) { string securityScope = onlyEquity ? "01" : "99"; onlyNonListing = onlyEquity && onlyNonListing; // no choice for all security scope string listScope = onlyNonListing ? "lst_yn2=N" : "lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D"; companyName = HttpUtility.UrlEncode(companyName, Encoding.GetEncoding("euc-kr")); string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun={0}" + "&{1}&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on" + "&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1" + "&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={2}" + "&ef_iss_inst_cd=&ef_isu_nm=&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", securityScope, listScope, companyName); AdvancedWebClient wc = new AdvancedWebClient(); string pageSource = WebClientUtil.GetPageSource(wc, queryURL, 300000, postData, Encoding.GetEncoding("euc-kr")); if (string.IsNullOrEmpty(pageSource)) { return(null); } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); if (doc.DocumentNode.SelectNodes("//table").Count < 2) { return(null); } HtmlNodeCollection records = doc.DocumentNode.SelectNodes("//table")[1].SelectNodes(".//tr"); if (records.Count == 1) { return(null); } records.RemoveAt(0); return(records); }
public async Task <ActionResult> Synchronize() { var url = "https://211colorado.communityos.org/z_eda/publicshelterassist.taf?function=list"; var getHtmlWeb = new HtmlWeb(); var document = await getHtmlWeb.LoadFromWebAsync(url); HtmlNodeCollection nodes = document.DocumentNode.SelectNodes("//table[3]/tr/td/table/tr"); if (nodes != null && nodes.Count > 1) { var shelters = new List <Shelter>(); var header = nodes[0]; nodes.RemoveAt(0); foreach (var node in nodes) { var shelter = new Shelter(); List <string> columnValues = node.SelectNodes("td").Select(td => td.InnerText.Replace(" ", " ").Trim()).ToList(); shelter.Name = columnValues[0]; shelter.CurrentPopulation = intParseDefault(columnValues[4], 0); shelter.SingleSpacesAvailable = intParseDefault(columnValues[5], 0); shelter.FamilySpacesAvailable = intParseDefault(columnValues[6], 0); shelter.CurrentTotalSpaces = intParseDefault(columnValues[7], 0); shelter.ShelterID = columnValues[12]; shelter.AvailabilityLastUpdated = DateTime.UtcNow; shelters.Add(shelter); } _dbContext.Shelters.RemoveRange(_dbContext.Shelters); _dbContext.SaveChanges(); _dbContext.Shelters.AddRange(shelters); _dbContext.SaveChanges(); } return(RedirectToAction("Index")); }
public static HtmlNodeCollection SearchISIN(string companyName, bool onlyEquity) { //string securityScope = onlyEquity ? "01" : "99"; //onlyNonListing = onlyEquity ? onlyNonListing : false; // no choice for all security scope //string listScope = onlyNonListing ? "lst_yn2=N" : "lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D"; //companyName = HttpUtility.UrlEncode(companyName, Encoding.GetEncoding("euc-kr")); //string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun={0}" // + "&{1}&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on" // + "&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1" // + "&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={2}" // + "&ef_iss_inst_cd=&ef_isu_nm=&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=", // securityScope, // listScope, // companyName); string uri = @"http://isin.krx.co.kr/srch/srch.do?method=srchList"; HttpWebRequest request = WebRequest.Create(uri) as HttpWebRequest; request.Timeout = 300000; request.Method = "GET"; request.CookieContainer = cookies; request.Host = @"isin.krx.co.kr"; request.Accept = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; request.Headers["Accept-Encoding"] = @"gzip,deflate,sdch"; request.Headers["Accept-Language"] = @"en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4"; request.KeepAlive = true; request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); StreamReader sr = new StreamReader(response.GetResponseStream()); string st = sr.ReadToEnd(); string postDataPath; if (onlyEquity) { if (!File.Exists(@"Config\Korea\EquityISINPostData.txt")) { System.Windows.Forms.MessageBox.Show(string.Format("The file {0} missing.", @"Config\Korea\EquityISINPostData.txt")); return(null); } postDataPath = @"Config\Korea\EquityISINPostData.txt"; } else { if (!File.Exists(@"Config\Korea\KDRISINPostData.txt")) { System.Windows.Forms.MessageBox.Show(string.Format("The file {0} missing.", @"Config\Korea\KDRISINPostData.txt")); return(null); } postDataPath = @"Config\Korea\KDRISINPostData.txt"; } string postData = string.Format(File.ReadAllText(postDataPath, Encoding.UTF8), companyName, companyName); //AdvancedWebClient wc = new AdvancedWebClient(); //string pageSource = WebClientUtil.GetPageSource(wc, uri, 300000, postData, Encoding.GetEncoding("euc-kr")); string pageSource = GetPageSource(uri, postData); if (string.IsNullOrEmpty(pageSource)) { return(null); } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); if (doc == null) { return(null); } if (doc.DocumentNode.SelectNodes("//table").Count < 2) { return(null); } HtmlNodeCollection records = doc.DocumentNode.SelectNodes("//table")[2].SelectNodes(".//tr"); if (records.Count == 1) { return(null); } records.RemoveAt(0); return(records); }
/// <summary> /// This will move all the nodes from the specified index to the new parent. /// </summary> private static void MoveNodesDown(ref HtmlNodeCollection nodes, int index, HtmlElement newParent) { int count = nodes.Count; for (int i = index; i < count; i++) { ((HtmlElement)newParent).Nodes.Add(nodes[i]); nodes[i].SetParent(newParent); } for (int i = index; i < count; i++) { nodes.RemoveAt(index); } newParent.IsExplicitlyTerminated = true; }
public void ProcessScore() { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(score_result); HtmlNode table = htmlDoc.DocumentNode.SelectSingleNode("//table[@class='table listTable']"); HtmlNodeCollection tableNodes = table.ChildNodes; HtmlNodeCollection trNodes = new HtmlNodeCollection(table); foreach (var n in tableNodes) { if (n.Name == "tr") { trNodes.Add(n); } } trNodes.RemoveAt(0); //第一个tr节点非课程项,故去除 foreach (var n in trNodes) { HtmlNodeCollection tdNodes = new HtmlNodeCollection(n); foreach (var d in n.ChildNodes) { if (d.Name == "td") { tdNodes.Add(d); } } var itemlist = new List <String>(); foreach (var td in tdNodes) { var a = td.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", ""); itemlist.Add(a); } using (var context = new jwContext()) { var thisLessonName = itemlist[0]; //直接将itemlist[1]放入Linq表达式将报错 var score = context.Scores.SingleOrDefault(s => s.StuID == stuid && s.LessonName == thisLessonName); if (score == null) //确保表中不存在此项记录 { if (itemlist[10] != "") //只存储已出成绩的课 { var newscore = new Score { StuID = stuid, LessonName = itemlist[0], LessonType = itemlist[1], GeneralLessonType = itemlist[2], LessonAttribute = itemlist[3], Credit = itemlist[4], TeacherName = itemlist[5], TeachingCollege = itemlist[6], LearningType = itemlist[7], Year = itemlist[8], Term = itemlist[9], Mark = itemlist[10], }; context.Scores.Add(newscore); context.SaveChanges(); } } } } }
public static List <string[]> GetBarnesAndNobleData(string bookTitle, char bookType, byte currPageNum) { // Initialize the html doc for crawling HtmlDocument doc = new HtmlDocument(); EdgeOptions edgeOptions = new EdgeOptions(); edgeOptions.UseChromium = true; edgeOptions.PageLoadStrategy = PageLoadStrategy.Eager; edgeOptions.AddArgument("headless"); edgeOptions.AddArgument("disable-gpu"); edgeOptions.AddArgument("disable-extensions"); edgeOptions.AddArgument("inprivate"); EdgeDriver edgeDriver = new EdgeDriver(edgeOptions); edgeDriver.Navigate().GoToUrl(GetUrl(bookTitle, currPageNum, bookType)); Thread.Sleep(2000); doc.LoadHtml(edgeDriver.PageSource); HtmlNodeCollection titleData = doc.DocumentNode.SelectNodes("//a[@class=' ']"); Console.WriteLine(); HtmlNodeCollection priceData = doc.DocumentNode.SelectNodes("//a[@class=' link']//span[last()]"); HtmlNodeCollection stockStatusData = doc.DocumentNode.SelectNodes("//div[1][@class='availability-spacing flex']//p"); HtmlNode pageCheck = doc.DocumentNode.SelectSingleNode("//li[@class='pagination__next ']"); if (bookType == 'N') { HtmlNodeCollection formatTypeData = doc.DocumentNode.SelectNodes("//span[@class='format']"); for (int x = 0; x < formatTypeData.Count; x++) { if (formatTypeData[x].InnerText.IndexOf("NOOK") != -1) { titleData.RemoveAt(x); formatTypeData.RemoveAt(x); x--; } } formatTypeData = null; //Free the format type list from memory } try{ string stockStatus, currTitle; Regex removeExtra = new Regex(@"[^a-z']"); for (int x = 0; x < titleData.Count; x++) { currTitle = titleData[x].InnerText; if (removeExtra.Replace(currTitle.ToLower(), "").IndexOf(removeExtra.Replace(bookTitle.ToLower(), "")) == 0) { stockStatus = stockStatusData[x].InnerText; if (stockStatus.IndexOf("Available Online") != -1) { stockStatus = "IS"; } else if (stockStatus.IndexOf("Out of Stock Online") != -1) { stockStatus = "OOS"; } else if (stockStatus.IndexOf("Pre-order Now") != -1) { stockStatus = "PO"; } dataList.Add(new string[] { currTitle, priceData[x].InnerText.Trim(), stockStatus, "Barnes & Noble" }); } } if (pageCheck != null) { currPageNum++; GetBarnesAndNobleData(bookTitle, bookType, currPageNum); } else { edgeDriver.Quit(); foreach (string link in links) { Console.WriteLine(link); } } } catch (NullReferenceException ex) { Console.Error.WriteLine(ex); Environment.Exit(1); } using (StreamWriter outputFile = new StreamWriter(@"C:\MangaWebScrape\MangaWebScrape\Data_Files\BarnesAndNobleData.txt")) { foreach (string[] data in dataList) { outputFile.WriteLine(data[0] + " " + data[1] + " " + data[2] + " " + data[3]); } } return(dataList); }
public void Run(DateTime day) { bool is25hours = (day.Month == 10 && isLastSunday(day)); bool is23hours = !is25hours && (day.Month == 3 && isLastSunday(day)); string URL = _baseURL + day.ToString("yyyy-MM-dd") + "/FR"; try { _htmlDoc.LoadHtml(_webClient.DownloadString(URL)); //ottengo l'array delle date visualizzate HtmlNode dateRow = _htmlDoc.DocumentNode.SelectSingleNode("//div[@id='tab_fr']//table[@class='list hours responsive']//tr"); List <DateTime> days = new List <DateTime>(); foreach (HtmlNode col in dateRow.SelectNodes("th")) { DateTime d = new DateTime(); if (DateTime.TryParseExact(col.InnerText + " " + day.Year, "ddd, dd/MM yyyy", new CultureInfo("en-US"), DateTimeStyles.None, out d)) { days.Add(d); } } KeyValuePair <string, int>[] tabIDs = new KeyValuePair <string, int>[] { new KeyValuePair <string, int>("tab_fr", 987), new KeyValuePair <string, int>("tab_de", 924), new KeyValuePair <string, int>("tab_ch", 988) }; foreach (KeyValuePair <string, int> tabID in tabIDs) { HtmlNodeCollection tab = _htmlDoc.DocumentNode.SelectNodes("//div[@id='" + tabID.Key + "']//table[@class='list hours responsive']//tr[@class='no-border']"); //la mia data ha 24 ore ma la tabella contiene anche la riga della 25-esima if (!is25hours && tab.Count() == 25) { tab.RemoveAt(3); } DataTable dt = initTable(); int i = 0; int index = days.IndexOf(day); foreach (HtmlNode row in tab) { //seleziono il valore che mi interessa dalla tabella sapendo che index è 0-based e che le prime 2 colonne sono di intestazione HtmlNode mgpVal = row.SelectSingleNode("td[" + (3 + index) + "]"); DataRow newRow = dt.NewRow(); newRow["Zona"] = tabID.Value; newRow["Data"] = day.ToString("yyyyMMdd") + (++i < 10 ? "0" : "") + i; newRow["Mgp"] = 0; decimal tmp; if (Decimal.TryParse(mgpVal.InnerText.Replace('.', ','), out tmp)) { newRow["MGP"] = tmp; } dt.Rows.Add(newRow); } if (dt.Rows.Count > 0) { //scrivo la tabella all'interno del caricatore string path = Path.Combine(_basePath, day.ToString("yyyyMMdd") + "_" + tabID.Value + ".xml"); dt.WriteXml(path); } } } catch (Exception) { } }
/// <summary> /// 根据关键词获取小说列表 /// </summary> /// <returns></returns> public List <tb_fiction_info> _o_Get_Fiction_Info_By_KeyWord() { //判断关键字 if (_str_KeyWord == "") { return(null); } List <tb_fiction_info> _ltfi_ret = new List <tb_fiction_info>(); HtmlWeb _web_Main = new HtmlWeb(); _web_Main.OverrideEncoding = Encoding.UTF8; try { HtmlAgilityPack.HtmlDocument _doc_Main = new HtmlAgilityPack.HtmlDocument(); _doc_Main = _web_Main.Load(_url_Search + _str_KeyWord); //判断是否有数据 if (_doc_Main.Text == "") { return(null); } //获取查询列表 HtmlNodeCollection _hnc_Search_List = _doc_Main.DocumentNode.SelectNodes("//div[starts-with(@class,'search-list')]/ul/li"); //查询列表第一项为表头,所有查询项数据需要大于1 if (_hnc_Search_List.Count == 1) { return(null); } //移除表头 _hnc_Search_List.RemoveAt(0); foreach (HtmlNode _hn in _hnc_Search_List) { HtmlAgilityPack.HtmlDocument _doc_One = new HtmlAgilityPack.HtmlDocument(); _doc_One.LoadHtml(_hn.InnerHtml); tb_fiction_info _tfi = new tb_fiction_info(); //获取小说类型 HtmlNodeCollection _hnc_Fiction_Type = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s1')]"); if (_hnc_Fiction_Type != null && _hnc_Fiction_Type.Count > 0) { _tfi.col_fiction_type = _hnc_Fiction_Type[0].InnerText.Replace("[", "").Replace("]", ""); } //获取小说名称及主页链接 HtmlNodeCollection _hnc_Fiction_Name_URL = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s2')]/a"); if (_hnc_Fiction_Name_URL != null && _hnc_Fiction_Name_URL.Count > 0) { _tfi.col_fiction_name = _hnc_Fiction_Name_URL[0].InnerText.Trim(); _tfi.col_url_homepage = _hnc_Fiction_Name_URL[0].Attributes["href"].Value; } //获取最新章节及链接 HtmlNodeCollection _hnc_Update_Chapter_URL = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s3')]/a"); if (_hnc_Update_Chapter_URL != null && _hnc_Update_Chapter_URL.Count > 0) { _tfi.col_update_chapter = _hnc_Update_Chapter_URL[0].InnerText; _tfi.col_update_chapter_url = _hnc_Update_Chapter_URL[0].Attributes["href"].Value; } //获取小说作者 HtmlNodeCollection _hnc_Fiction_Author = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s4')]"); if (_hnc_Fiction_Author != null && _hnc_Fiction_Author.Count > 0) { _tfi.col_fiction_author = _hnc_Fiction_Author[0].InnerText; } //获取点击数 HtmlNodeCollection _hnc_Click_Count = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s5')]"); if (_hnc_Click_Count != null && _hnc_Click_Count.Count > 0) { _tfi.col_click_count = _hnc_Click_Count[0].InnerText; } //获取更新时间 HtmlNodeCollection _hnc_Update_Time = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s6')]"); if (_hnc_Update_Time != null && _hnc_Update_Time.Count > 0) { _tfi.col_update_time = DateTime.Parse(_hnc_Update_Time[0].InnerText); } //获取小说状态 HtmlNodeCollection _hnc_Fiction_Stata = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s7')]"); if (_hnc_Fiction_Stata != null && _hnc_Fiction_Stata.Count > 0) { _tfi.col_fiction_stata = _hnc_Fiction_Stata[0].InnerText; } _tfi.col_fiction_source = "笔趣阁"; _ltfi_ret.Add(_tfi); } return(_ltfi_ret); } catch { return(null); } }
public List <Info> GetChapList(string address) { try { if (!GetNode(address)) { return(null); } HtmlWeb web = WebConfig(isEncodeGB2312); var doc = web.Load(address); HtmlNodeCollection chapList = doc.DocumentNode.SelectNodes(chapListNode); if (startNode > 0 || endNode > 0) { chapList = RemoveSurplusChap(chapList, startNode, endNode); } //Xử lý text bị đặt sai thứ tự (3, 2, 1, 6, 5, 4,...) thành (1, 2, 3, 4, 5, 6,...) List <Info> listInfo = new List <Info>(); if (isRightToLeft) { HtmlNode temp; for (int i = 0; i < chapList.Count; i += 3) { temp = chapList[i]; chapList[i] = chapList[i + 2]; chapList[i + 2] = temp; } List <int> deleteChap = new List <int>(); Parallel.For(0, chapList.Count, i => { if (chapList[i].InnerHtml == " ") { deleteChap.Add(i); } }); deleteChap.Sort(); for (int i = deleteChap.Count - 1; i >= 0; i--) { chapList.RemoveAt(deleteChap[i]); } } for (int i = 0; i < chapList.Count; i++) { Info info = new Info(); HtmlNode data = chapList[i]; if (isRightToLeft) { info.Title = chapList[i].SelectSingleNode("a").InnerText; info.Address = chapAddressNode + chapList[i].SelectSingleNode("a").Attributes["href"].Value; } else { info.Title = chapList[i].InnerText; info.Address = chapAddressNode + chapList[i].Attributes["href"].Value; } listInfo.Add(info); } if (isReverse) { listInfo.Reverse(); } return(listInfo); } catch (Exception e) { MessageBox.Show(e.Message, "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Error); return(null); } }