void SanitizerNodeVisited(string nodeName, HtmlNode node, XmlWriter writer) { if (nodeName == "img" && node.Attributes["src"] != null && node.Attributes["src"].Value.StartsWith("cid:")) { // split src var src = node.Attributes["src"].Value.Split(new[] { ':' }, 2); if (src.Length == 2) { // Find inline attachment with given contentid var document = source.Documents.FirstOrDefault(d => d.ContentType == ContentType.Inline && d.ContentId == src[1]); if (document != null) { // Replace content-id url with filename var filename = ClientState.Current.Storage.ResolvePhysicalFilename(".", document.StreamName); node.Attributes["src"].Value = String.Format("file://{0}", filename); } } } else if (nodeName == "a" && node.Attributes["href"] != null) { var url = node.Attributes["href"].Value; // Clean href and inject javascript hook node.Attributes["href"].Value = String.Empty; writer.WriteAttributeString("onclick", String.Format("javascript:window.external.JsNavigate('{0}')", url)); } }
private static void ConvertContentTo(HtmlNode node, TextWriter outText) { foreach (HtmlNode subnode in node.ChildNodes) { ConvertTo(subnode, outText); } }
public ItemCrawler(Uri url) { _htmlDocument = new HtmlDocument(); var html = new WebClient().DownloadString(url.OriginalString); _htmlDocument.LoadHtml(html); _document = _htmlDocument.DocumentNode; }
private List <VideoInfo> GetOppetArkivVideoList(HtmlAgilityPack.HtmlNode node) { List <VideoInfo> videoList = new List <VideoInfo>(); foreach (var article in node.Descendants("article")) { VideoInfo video = new VideoInfo(); video.VideoUrl = article.Descendants("a").Select(a => a.GetAttributeValue("href", "")).FirstOrDefault(); Uri result; if (!Uri.TryCreate(video.VideoUrl, UriKind.Absolute, out result)) { Uri.TryCreate(new Uri("http://www.oppetarkiv.se/"), video.VideoUrl, out result); } video.VideoUrl = result.ToString(); if (!string.IsNullOrEmpty(video.VideoUrl)) { video.Title = HttpUtility.HtmlDecode((article.Descendants("a").Select(a => a.GetAttributeValue("title", "")).FirstOrDefault() ?? "").Trim().Replace('\n', ' ')); video.Thumb = (article.SelectSingleNode(".//noscript/img") != null) ? article.SelectSingleNode(".//noscript/img").GetAttributeValue("src", "") : ""; if (video.Thumb.StartsWith("//")) { video.Thumb = "http:" + video.Thumb; } video.Airdate = article.Descendants("time").Select(t => t.GetAttributeValue("datetime", "")).FirstOrDefault(); if (!string.IsNullOrEmpty(video.Airdate)) { video.Airdate = DateTime.Parse(video.Airdate).ToString("d", OnlineVideoSettings.Instance.Locale); } videoList.Add(video); } } return(videoList); }
// Schedule_W 얻기 private Schedule_W CreateSchedule_WFromNode(HtmlNode node, Int32 year, Int32 month, Int32 day) { try { if (GetInnerHtml(node, "none") != null || GetInnerHtml(node, "relay") == null) { return null; } return new Schedule_W { Year = year, Month = month, Day = day, Time = GetInnerHtml(node, "time"), Play = GetInnerHtml(node, "play"), Relay = GetInnerHtml(node, "relay"), BallPark = GetInnerHtml(node, "ballpark"), Etc = GetInnerHtml(node, "etc"), }; } catch (Exception exception) { throw exception; } }
public static HAP.HtmlNode ReplaceWithText(this HAP.HtmlNode el, string s) { var newNode = HAP.HtmlNode.CreateNode(s); el.ParentNode.ReplaceChild(newNode, el); return(newNode); }
private static void AddPackage(SteamApp app, HtmlNode packageNode) { var package = app.AddNewPackage(); var packageTitleNode = packageNode.SelectSingleNode($"//{PackageTitle}"); package.Title = packageTitleNode.InnerHtml.Replace("Buy ", "").Trim(); var priceNodes = packageNode.SelectNodes($"//div[@class='{PackagePriceXPath}']"); if (priceNodes != null) { var priceNode = priceNodes[0]; package.CurrentPrice = ParseNodeWithCurrencyToDecimal(priceNode); package.OriginalPrice = package.CurrentPrice; } else { var originalPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageOriginalPriceXPath}']"); package.OriginalPrice = ParseNodeWithCurrencyToDecimal(originalPriceNode); var discountPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageDiscountPriceXPath}']"); package.CurrentPrice = ParseNodeWithCurrencyToDecimal(discountPriceNode); } }
public object[] Parse(string subject, string body) { this.subject = subject; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(body); this.senderInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/table[2]"); this.productInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/h3"); this.cantactInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/table/tr/td[2]/h4"); object[] dataItem = new object[13]; dataItem[0] = 0; dataItem[1] = GetMsgIp(); dataItem[2] = GetOrigin(); dataItem[3] = GetProduct(); dataItem[4] = GetName(); dataItem[5] = GetMail(); dataItem[6] = GetCountry(); dataItem[7] = GetTelephone(); dataItem[8] = GetCompany(); dataItem[9] = GetAddress(); dataItem[10] = GetFax(); dataItem[11] = string.Empty; dataItem[12] = string.Empty; return dataItem; }
public static IList<HtmlWord> GetWords(this HtmlNode node, HtmlNode top) { var words = new List<HtmlWord>(); if (node.HasChildNodes) { foreach (var child in node.ChildNodes) words.AddRange(child.GetWords(top)); } else { var textNode = node as HtmlTextNode; if (textNode != null && !string.IsNullOrEmpty(textNode.Text)) { string[] singleWords = textNode.Text.Split( new string[] {" "}, StringSplitOptions.RemoveEmptyEntries ); words.AddRange( singleWords .Select(w => new HtmlWord(w, node.ParentNode, top) ) ); } } return words.AsReadOnly(); }
public HtmlNode ParseLink(HtmlNode node) { HtmlNode htmlHref = null; htmlHref = node.SelectSingleNode("a"); return htmlHref; }
public static void LoadFromUrl(string url) { HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load("http://brico-direct.tn/63-quincaillerie-en-ligne?id_category=63&n=275"); // ParseErrors is an ArrayList containing any errors from the Load statement if (doc.ParseErrors != null && doc.ParseErrors.Any()) { // Handle any parse errors as required } else { if (doc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = doc.DocumentNode.SelectSingleNode("//body"); if (bodyNode != null) { HtmlNode RootNode = null, FirstDivNode = null, HeaderNode = null; //declares and instantiates htmlNode needed RootNode = doc.DocumentNode; //Gets the root node of the document and passes to the RootNode //select the first div in the root node or document FirstDivNode = RootNode.SelectSingleNode("//div"); } } } }
public string GetPropertyBySeletor(string container, string selector) { string result = string.Empty; HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(container); // to load from a string (was htmlDoc.LoadXML(xmlString) // ParseErrors is an ArrayList containing any errors from the Load statement if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // Handle any parse errors as required LogApplication.Agent.LogError(htmlDoc.ParseErrors.First().Reason); //Console.WriteLine(htmlDoc.ParseErrors.First().Reason); } else { if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode elementNode = htmlDoc.DocumentNode.SelectSingleNode("//" + selector); if (elementNode != null) { result = elementNode.InnerHtml; Console.WriteLine("-->>>>>>>>>>>>>>>>>>>>>>>" + result + "<<<<<<<<<<<<<<<<<<<<<<<<<<---"); LogApplication.Agent.LogWarn(elementNode.OuterHtml); } } } return(result); }
private static HtmlAgilityPack.HtmlNode GetPsalmSext(HtmlAgilityPack.HtmlNode at, out string a, out string n, out string c, out string t) { var at1 = at; a = at1.InnerText; var psn1 = at1.NextSibling; n = psn1.InnerText; var psc1 = psn1.NextSibling; var pst1 = psc1; c = string.Empty; if (psc1.Name != "div") { c = psc1.InnerText; pst1 = FindNode(psc1, "div"); } // t = ParsePsalm(pst1); if (pst1 == null) { Console.ReadKey(); } t = ParsePsalm(pst1); return(pst1); }
private static HtmlAgilityPack.HtmlNode GetPsalm(HtmlAgilityPack.HtmlNode at, out string a, out string n, out string c, out string t) { var at1 = at; a = at1.InnerText; var psn1 = at1.NextSibling; n = psn1.InnerText; var psc1 = psn1.NextSibling; var pst1 = psc1; c = string.Empty; if (psc1.Name == "p") { c = psc1.InnerText; pst1 = psc1.NextSibling; //pst1 if (pst1.Name == "p") { c = $"{c} | {pst1.InnerText}"; pst1 = pst1.NextSibling; //pst1 } } t = ParsePsalm(pst1); // t = pst1.InnerText; return(pst1); }
public Tuple <string, string, string, string> GetMatchData(HtmlAgilityPack.HtmlNode MatchListItem) { Tuple <string, string, string> newTuple; string date = MatchListItem.Descendants("td") .Where(node => (node.GetAttributeValue("class", "") .Contains("date"))).FirstOrDefault().InnerText.Trim(); string day = date.Substring(0, date.IndexOf('/')); string month = date.Substring(date.IndexOf('/') + 1, 2); string year = date.Substring(date.LastIndexOf('/') + 1, 2); string testString = date.Remove(date.LastIndexOf('/') + 1, 2); date = month + "/" + day + "/" + "20" + year; //DateTime evaluatedDate = Convert.ToDateTime(date); string homeTeam = MatchListItem.Descendants("td") .Where(node => (node.GetAttributeValue("class", "") .Contains("team-a"))).FirstOrDefault().InnerText.Trim(); string awayTeam = MatchListItem.Descendants("td") .Where(node => (node.GetAttributeValue("class", "") .Contains("team-b"))).FirstOrDefault().InnerText.Trim(); string score = MatchListItem.Descendants("td") .Where(node => (node.GetAttributeValue("class", "") .Contains("score-time"))).FirstOrDefault().InnerText.Trim(); return(new Tuple <string, string, string, string>(date, homeTeam, awayTeam, score)); }
public string GetPDFUrl(string id) { string pdfUrl = string.Empty; string postData = string.Empty; string url = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx"; DateTime today = DateTime.Now; string todayAll = today.ToString("yyyyMMdd"); string month = todayAll.Substring(4, 2); string day = todayAll.Substring(6, 2); string year = todayAll.Substring(0, 4); postData = File.ReadAllText(@"Config\HK\HKFMAndBulkFileGenerator.txt", Encoding.Default); postData = string.Format(postData, todayAll, id, day, month, year); Thread.Sleep(2000); HtmlDocument htmlDoc = GetDocFromHK(url, postData); HtmlAgilityPack.HtmlNode pdfLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//a[contains(@href, '.pdf')]"); if (pdfLinkNode == null) { Logger.Log("There's no PDF file for ric " + id); return(null); } else { pdfUrl = "http://www.hkexnews.hk"; pdfUrl += pdfLinkNode.Attributes["href"].Value; } return(pdfUrl); }
private static Dictionary <string, EmailRecord> getNameValueByElementType( HtmlAgilityPack.HtmlDocument source, SalesForce salesForce ) { Dictionary <string, EmailRecord> output = new Dictionary <string, EmailRecord>(); var document = source.DocumentNode; foreach (KeyValuePair <string, bool> emailItem in salesForce.emailHeaderIdentities) { if (emailItem.Value) { HtmlAgilityPack.HtmlNode editNode = source.GetElementbyId(emailItem.Key); if (editNode.Attributes.ToList().Count(x => x.Name == "value") >= 1) { HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["value"]; EmailRecord record = new EmailRecord(); record.emailAddress = attribute.Value; output.Add(emailItem.Key, record); } } } return(output); }
private static void ProcessContent(HtmlNode node, TextWriter outText) { foreach (var child in node.ChildNodes) { ProcessNode(child, outText); } }
private static IEnumerable<string> ExtractTitles(HtmlNode container) { // <a href="http://fr.feedbooks.com/item/316137/les-les-de-l-espace" itemprop="url">Les Îles de l'espace</a> return from element in container.Descendants("a") where element.GetAttributeValue("href", "").StartsWith("http://fr.feedbooks.com/item/") select element.InnerText; }
public static void AddColumnRowToTable(HtmlAgilityPack.HtmlNode table, string s1, string s2, string s3) { HtmlAttribute attr = table.OwnerDocument.CreateAttribute("style", "border:1px solid black;border-collapse:collapse;"); HtmlNode row = table.OwnerDocument.CreateElement("tr"); row.Attributes.Add(table.OwnerDocument.CreateAttribute("valign", "top")); HtmlNode td1 = table.OwnerDocument.CreateElement("td"); HtmlNode td2 = table.OwnerDocument.CreateElement("td"); HtmlAttribute rightJustify = table.OwnerDocument.CreateAttribute("align", "right"); row.Attributes.Add(attr); td1.Attributes.Add(attr); td2.Attributes.Add(attr); td1.Attributes.Add(rightJustify); td2.Attributes.Add(rightJustify); td1.InnerHtml = s1; td2.InnerHtml = s2; row.ChildNodes.Add(td1); row.ChildNodes.Add(td2); if (string.IsNullOrEmpty(s3) == false) { HtmlNode td3 = table.OwnerDocument.CreateElement("td"); td3.Attributes.Add(attr); td3.Attributes.Add(rightJustify); td3.InnerHtml = s3; row.ChildNodes.Add(td3); } table.ChildNodes.Add(row); }
private static HtmlNode CreateContentHeaderNode(HtmlNode linkNode) { var headerNode = HtmlNode.CreateNode("<div />"); headerNode.SetAttributeValue("class", "regionMain fullWidth"); headerNode.AppendChild(linkNode); return headerNode; }
private void ExtractChildren(ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, JObject container, List <HtmlAgilityPack.HtmlNode> logicalParents) { foreach (var child in config.Children) { var childName = child.Key; var childConfig = child.Value; var childObject = this.Extract(name: childName, config: childConfig, parentNode: parentNode, logicalParents: logicalParents); if (childObject is JObject) { if (((JObject)childObject).Count > 0) { container[childName] = (JToken)childObject; } } else if (childObject is JArray) { if (((JArray)childObject).Count > 0) { container[childName] = (JToken)childObject; } } else { container[childName] = (JToken)childObject; } } }
public Field(HtmlNode node) { String fieldInnerText = Utils.RemoveAllNotNumberCharacters(node.InnerText); String[] tempArray = fieldInnerText.Split(','); this.actual = int.Parse(tempArray[0]); this.max = int.Parse(tempArray[1]); }
private static List <string> GetdateSeparatorList(HtmlAgilityPack.HtmlDocument pageresult) { string dateSeparatorXPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w2g\", \" \" ))]"; HtmlAgilityPack.HtmlNodeCollection dateSeparators = pageresult.DocumentNode.SelectNodes(dateSeparatorXPath); // Date separators, e.g. 2019. március 3. vasárnap. Typically there is 7 on a page. List <string> dateSeparatorList = new List <string>(); // This holds a list of date separator strings. string daterangeXPath = "//*[@id=\"ctl00_C_p\"]/div[@class=\"tvhead\"]/div[@class=\"tvheadtitle\"]/h2[@class=\"tvh2\"]"; HtmlAgilityPack.HtmlNode startdate = pageresult.DocumentNode.SelectSingleNode(daterangeXPath); // This will be used to get the first day/date of the current week schedule dateSeparatorList.Add(startdate.InnerText); // start the dateseparatorList with the startdate string[] parts1 = startdate.InnerText.Split(); // Fix up first entry with proper day of the week string[] parts2 = dateSeparators[dateSeparators.Count - 1].InnerText.Split(); List <ShowDate> showDateList = new List <ShowDate>(); dateSeparatorList[0] = string.Join(" ", String.Join(" ", parts1.Take(3).ToArray()), parts2[parts2.Count() - 1]); foreach (HtmlAgilityPack.HtmlNode dateSeparator in dateSeparators) { dateSeparatorList.Add(dateSeparator.InnerText); } return(dateSeparatorList); }
private HtmlNodeCollection NavigateToSubstitutionTableAndRemoveTextNodex (HtmlNode root) { HtmlNode vp = root.LastChild.PreviousSibling; HtmlNode haupt = vp.ChildNodes[5]; HtmlNodeCollection collectionWithoutTextNodes = removeTextNodes(haupt.ChildNodes); return collectionWithoutTextNodes; }
public static H.HtmlNodeCollection SelectNodesOrEmpty(this H.HtmlNode node, string xpath) { Requires.NonNull(node, nameof(node)); var nodeCollection = node.SelectNodes(xpath); return(nodeCollection ?? new H.HtmlNodeCollection(node)); }
public bool isLINodeaVote(HtmlAgilityPack.HtmlNode LiNode) //helps to determine if the li node on input is a vote or not { if (LiNode.HasChildNodes) { if (LiNode.ChildNodes.Count == 3) { var firstChildClassAValue = LiNode.ChildNodes.First().GetAttributeValue("class", "not found"); switch (firstChildClassAValue) //all the possible types of voting { case "flag yes": case "flag no": case "flag not-logged-in": case "flag refrained": case "flag excused": return(true); default: return(false); } } else { return(false); } } else { return(false); } }
public static HAP.HtmlNode ReplaceWithInnerText(this HAP.HtmlNode el) { var newNode = HAP.HtmlNode.CreateNode(el.InnerText); el.ParentNode.ReplaceChild(newNode, el); return(newNode); }
private Result ParseResult(HtmlNode resultSet) { string[] courseSplit = _currentCourse.Split(new[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries); string courseName = courseSplit[0]; string eventName = courseSplit[1]; var doc = new HtmlDocument(); doc.LoadHtml(resultSet.OuterHtml); IEnumerable<HtmlNode> allResults = doc.DocumentNode.QuerySelectorAll("td"); string position = allResults.ElementAt(0).QuerySelector("b").InnerText; string name = allResults.ElementAt(1).QuerySelector("b").InnerText; string jockey = allResults.ElementAt(2).QuerySelector("b").InnerText; string startingPrice = allResults.ElementAt(3).QuerySelector("b").InnerText.Trim(); var price = new Price{DecimalPrice = 0,Denominator=0,Numerator = 0}; try { price = new Price(startingPrice); } catch(ArgumentException) {} return new Result { CourseName = courseName, EventName = eventName, Position = position, HorseName = name, JockeyName = jockey, StartingPriceDecimal = price.DecimalPrice, StartingPriceDenominator = price.Denominator, StartingPriceNumerator = price.Numerator }; }
private void moduleProperties_Click(object sender, EventArgs e) { HtmlAgilityPack.HtmlDocument doc = HTMLDocumentConverter.mshtmlDocToAgilityPackDoc(htmlEditor1.HtmlDocument2); HtmlAgilityPack.HtmlNode elem = doc.GetElementbyId(this.activeElement.id); CFormController.Instance.mainForm.propertiesForm.moduleChanged += new ModuleChanged(propertiesForm_moduleChanged); CFormController.Instance.mainForm.showProperties(elem); }
private MarketRateModel BuildMarketRate(HtmlNode monthNode) { var result = new MarketRateModel(); string value = ""; var dataCode = monthNode.GetAttributeValue("href", "http://finance.ifeng.com/app/hq/stock/sh000001/"); if ("http://finance.ifeng.com/app/hq/stock/sh000001/" == dataCode) { value = monthNode.NextSibling.NextSibling.InnerText.Trim(); value = value.Split(' ')[0]; result.Type = RateType.StockShangzheng; result.Rate = decimal.Parse(value); } else if ("http://finance.ifeng.com/app/hq/stock/sz399001/" == dataCode) { value = monthNode.NextSibling.NextSibling.InnerText.Trim(); value = value.Split(' ')[0]; result.Type = RateType.StockShenzhen; result.Rate = decimal.Parse(value); } else { return null; } result.CreateTime = DateTime.Now; result.RateDay = DateTime.Now.Date; result.Source = SourceType.eIfeng; return result; }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept: application/json, text/javascript, */* q=0.01 " + @"Accept-Encoding: gzip, deflate " + @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " + @"Connection: keep-alive " + @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " + @"Host: catdot.dianping.com " + @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " + @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0"; string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); HtmlDocument doc = new HtmlDocument(); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"txt\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
protected override string OnScrape(string url, HtmlNode elem) { url = SubstringBetween(elem.InnerHtml, "un=\"", "\""); if (!url.StartsWith("http")) url = "http://" + url; return new Uri(url).AbsoluteUri; }
public void HTMLAgilityPack(string filePath) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // There are various options, set as needed htmlDoc.OptionFixNestedTags = true; // filePath is a path to a file containing the html //htmlDoc.Load(filePath); htmlDoc.LoadHtml(filePath); // Use: htmlDoc.LoadHtml(xmlString); to load from a string (was htmlDoc.LoadXML(xmlString) // ParseErrors is an ArrayList containing any errors from the Load statement if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // Handle any parse errors as required } else { if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode != null) { // Do something with bodyNode } } } }
public TagOpenToken(int id, HtmlNode node, TextVisualProperties properties, int parentID) : base(id) { Name = node.Name; TextProperties = properties; ParentID = parentID; }
public void UpdateChecker(string currentVersion) //구현 { string github = "https://github.com/hanel2527/dcinisde-crawler.ver.2/blob/master/versions.txt"; var client = new WebClient(); client.Encoding = System.Text.Encoding.UTF8; string text = client.DownloadString(github); hap.HtmlDocument doc = new hap.HtmlDocument(); doc.LoadHtml(text); hap.HtmlNode myVersions = doc.DocumentNode. SelectSingleNode("//table[@class='highlight tab-size js-file-line-container']"); text = myVersions.InnerText.Trim(); string[] versions = text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries); if (NewVersionUpdateExist != null) { if (versions[0].Equals(currentVersion)) { NewVersionUpdateExist("최신 버전입니다: " + versions[0], null); } else { NewVersionUpdateExist("새로운 업데이트가 있습니다(클릭): " + versions[0], null); } } }
public string GetPDFUrl(string id) { string pdfUrl = string.Empty; string postData = getPostData(id); string Uri = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx"; try { string pageSource = WebClientUtil.GetPageSource(Uri, 24000, postData); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(pageSource); HtmlAgilityPack.HtmlNode pdfLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//a[contains(@href, '.pdf')]"); if (pdfLinkNode == null) { Logger.Log("There's no PDF file for ric " + id); return(null); } else { pdfUrl = "http://www.hkexnews.hk"; pdfUrl += pdfLinkNode.Attributes["href"].Value; } } catch (Exception ex) { string errInfo = ex.ToString(); } return(pdfUrl); }
long NumOfHits(string phrase) { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument htmlDoc = web.Load("https://www.google.com/search?q=" + phrase); if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // Handle any parse errors as requiredcw System.Console.WriteLine("error"); debug.Print("error\n"); return(-1); } else if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode node = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='resultStats']"); Regex re = new Regex(@"[1-9](?:\d{0,2})(?:,\d{3})*(?:\.\d*[1-9])?|0?\.\d*[1-9]|0"); String result = re.Match(node.InnerHtml).Value; long hits = 0; if (result.Contains(",")) { hits = long.Parse(result.Replace(",", "")); } //System.Console.WriteLine(hits); return(hits); } return(-1); }
private bool IsAllowedTypeRecursive(HtmlAgilityPack.HtmlNode node) { if (node.NodeType == HtmlAgilityPack.HtmlNodeType.Text) { return(true); } if (node.NodeType != HtmlAgilityPack.HtmlNodeType.Element) { return(false); } if (!allowedTags.Contains(node.Name)) { return(false); } var children = node.ChildNodes; if (children != null) { foreach (var child in children) { if (!this.IsAllowedTypeRecursive(child)) { return(false); } } } return(true); }
private string GetStringDataFormUrl(string url) { try { HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url); myRequest.Method = "GET"; WebResponse myResponse = myRequest.GetResponse(); StreamReader sr = new StreamReader(myResponse.GetResponseStream(), System.Text.Encoding.UTF8); string result = sr.ReadToEnd(); sr.Close(); myResponse.Close(); HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument(); htmldoc.LoadHtml(result); HtmlAgilityPack.HtmlNode datanode = htmldoc.GetElementbyId("js-exportData"); if (datanode != null) { return(datanode.InnerText); } } catch (Exception e) { MessageBox.Show(e.Message + "\n" + url); } return(""); }
private static Activity ExtractActivity(HtmlNode node, int index) { var name = node.Descendants("div") .Where(div => div.GetAttributeValue("class", null) == "action_prompt") .Select(div => HtmlEntity.DeEntitize(div.InnerText).Trim().Replace(" ", " ")) .FirstOrDefault(); if (name == null) { throw new InvalidDataException("Unable to find activity name"); } return new Activity { Sequence = index, Name = name, Note = node.Descendants("li") .Where(li => li.GetAttributeValue("class", null) == "stream_note") .Select(li => HtmlEntity.DeEntitize(li.InnerText).Trim()) .FirstOrDefault(), Sets = node.Descendants("li") .Where(li => li.GetAttributeValue("class", null) != "stream_note") .Select(ExtractSet) .ToList() }; }
internal static bool IsMatch(this HtmlAgilityPack.HtmlNode node, ExCSS.Model.Attribute attribute) { var attr = node.Attributes[attribute.Operand]; if (attr != null) { var value = attr.Value; var test = (attribute.Value ?? "").Trim(' ', '\'', '"'); switch (attribute.Operator) { case AttributeOperator.BeginsWith: return(value.StartsWith(test)); case AttributeOperator.Contains: return(value.Contains(test)); case AttributeOperator.EndsWith: return(value.EndsWith(test)); case AttributeOperator.Equals: return(value == test); case AttributeOperator.Hyphenated: return(value.Split('-').Contains(test)); case AttributeOperator.InList: return(value.Split(' ').Contains(test)); case AttributeOperator.None: return(true); } } return(false); }
public void Parse(HtmlNode rowNode) { Status = rowNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("status")) .Descendants("img") .FirstOrDefault() .GetAttributeValue("src", string.Empty); Icon = rowNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("icon")) .Descendants("img") .FirstOrDefault() .GetAttributeValue("src", string.Empty); Title = rowNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("title")) .InnerText; Sender = rowNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("sender")) .InnerText; Date = rowNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("date")) .InnerText; }
/// <summary> /// 读取response.ResponseHtml转化为HtmlNode /// </summary> /// <param name="chtml">response.ResponseHtml</param> /// <returns>HtmlNode</returns> public static HtmlNode ToHtmlNode(string chtml) { HtmlAgilityPack.HtmlDocument htmlNode = new HtmlDocument(); htmlNode.LoadHtml(chtml); HtmlAgilityPack.HtmlNode item = htmlNode.DocumentNode; return(item); }
private static Nhl_Players_Bio_Goalie MapHtmlRowToModel(HtmlNode row, NhlSeasonType nhlSeasonType, int year) { HtmlNodeCollection tdNodes = row.SelectNodes(@"./td"); Nhl_Players_Bio_Goalie model = new Nhl_Players_Bio_Goalie(); model.NhlSeasonType = nhlSeasonType; model.Year = year; model.Number = ConvertStringToInt(tdNodes[0].InnerText); model.Name = tdNodes[1].InnerText; model.Team = tdNodes[2].InnerText; model.Position = "G"; model.DateOfBirth = Convert.ToDateTime(tdNodes[3].InnerText.Replace("'", "/")); model.BirthCity = tdNodes[4].InnerText; model.StateOrProvince = tdNodes[5].InnerText; model.BirthCountry = tdNodes[6].InnerText; model.HeightInches = ConvertStringToInt(tdNodes[7].InnerText); model.WeightLbs = ConvertStringToInt(tdNodes[8].InnerText); model.Catches = tdNodes[9].InnerText; model.Rookie = tdNodes[10].InnerText; model.DraftYear = ConvertStringToInt(tdNodes[11].InnerText); model.DraftRound = ConvertStringToInt(tdNodes[12].InnerText); model.DraftOverall = ConvertStringToInt(tdNodes[13].InnerText); model.GamesPlayed = ConvertStringToInt(tdNodes[14].InnerText); model.Wins = ConvertStringToInt(tdNodes[15].InnerText); model.Losses = ConvertStringToInt(tdNodes[16].InnerText); model.OTSOLosses = ConvertStringToInt(tdNodes[17].InnerText); model.GAA = Convert.ToDouble(tdNodes[18].InnerText); model.SavePercentage = Convert.ToDouble(tdNodes[19].InnerText); model.Shutouts = ConvertStringToInt(tdNodes[20].InnerText); return model; }
public static void Classification(Menu menu, agi.HtmlNode node) { agi.HtmlNodeCollection divide_td = node.SelectNodes(".//td"); agi.HtmlNodeCollection check_div = divide_td[0].SelectNodes(".//div"); agi.HtmlNodeCollection check_br = divide_td[0].SelectNodes(".//br"); int count = check_br.Count; if (check_div == null) { return; } if (count > 2) { String text = divide_td[0].InnerHtml; text = text.Replace("<br>", "</div><div>"); divide_td[0].InnerHtml = text; agi.HtmlNodeCollection tmp = divide_td[0].SelectNodes(".//div"); for (int i = 0; i < tmp.Count; i++) { menu.menu.Add(tmp[i].InnerText); } } else { //menu.menu.Add(node.InnerText); for (int i = 0; i < count; i++) { menu.menu.Add(check_div[i].InnerText); } } }
private HtmlNode GetTable1Node(HtmlNode node) { HtmlNode table = UIUtils.CreateReportTableNode(node); UIUtils.AddColumnRowToTable(table, "Total Number of Patients", highrisk.denominator.ToString("#,###,###"),""); int percent = (int)Math.Round(100 * (double)high_risk_prevelance / (double)highrisk.denominator, 0); UIUtils.AddColumnRowToTable(table, "All High Risk BRCA Patients", high_risk_incidence.ToString("#,###,###"), percent.ToString() + "%"); percent = (int)Math.Round(100 * (double)high_risk_prevelance / (double)highrisk.denominator, 0); UIUtils.AddColumnRowToTable(table, "New High Risk BRCA", high_risk_incidence.ToString("#,###,###"), percent.ToString() + "%"); percent = (int)Math.Round(100 * (double)high_risk_seenInRC / (double)highrisk.denominator, 0); UIUtils.AddColumnRowToTable(table, "All High Risk BRCA Seen In Cancer Genetics", high_risk_seenInRC.ToString("#,###,###"), percent.ToString() + "%"); return table; }
private string[] GetValueFromClears(HtmlAgilityPack.HtmlNode item) { string[] finalValue = { "", "", "" }; // Create array of strings int position = 0; // Make postition 0 foreach (var val in item.ChildNodes) // For each value in the child node { foreach (var attribute in val.Attributes) // And for each attribute { string attributeValue = attribute.Value; // Get the attribute value attributeValue = attributeValue.Replace("typography typography-", ""); // Remove unnecessary characters if (attributeValue.Length <= 1) // If the attribute length is lower or equal to one { finalValue[position] += attributeValue; // Add the attribute value to the position position in final value } else if (attributeValue == "slash") // Otherwise if the attribute value equals second { position = 1; // Go to next position in final value array } } } finalValue[2] = ((float)int.Parse(finalValue[0]) / (float)int.Parse(finalValue[1])).ToString("P"); // Get the clear rate by dividing clears over plays return(finalValue); // Return string }
public ActionUrlNode(HtmlNode htmlNode) : base(htmlNode.NodeType, htmlNode.OwnerDocument, -1) { Name = htmlNode.Name; CopyFrom(htmlNode, false); RouteValues = new Dictionary<string, RouteValueDictionary>(); }
public static string ProcessPageContentToString(HtmlAgilityPack.HtmlNode HTMLContent) { //remove all the tabs and newlines String output = Regex.Replace(HTMLContent.OuterHtml, @"\t|\n|\r", ""); return(output); }
private void ParseChapterRow(HtmlNode tr) { var item = new Book(); var chaperTd = HtmlParseHelper.GetSingleDirectChildByType(tr, "td"); var chaperUrl = HtmlParseHelper.GetSingleDirectChildByType(chaperTd, "a"); var websiteTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 1); var websiteName = HtmlParseHelper.GetSingleDirectChildByType(websiteTd, "a"); var websiteIndexTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 2); var websiteIndexUrl = HtmlParseHelper.GetSingleDirectChildByType(websiteIndexTd, "a"); item.LastUpdateTime = DateTime.Now; item.Name = metaData.Name.Trim(); item.IndexPage = new Uri("http://www.xiaoelang.com" + websiteIndexUrl.Attributes["href"].Value, UriKind.Absolute); item.LastestUpdateChapterName = chaperUrl.InnerText.Trim(); item.WebSite = new WebSite(); item.WebSite.WebSiteName = websiteName.InnerText.Trim(); if (item.WebSite.WebSiteName.Contains(websiteFilter1) || item.WebSite.WebSiteName.Contains(websiteFilter2)) return; var websiteBookPairAlreadyExists = (from i in items where i.WebSite.WebSiteName == item.WebSite.WebSiteName select i).FirstOrDefault(); if (websiteBookPairAlreadyExists == null) { items.Add(item); } }
public string GetTextFromSiblings(HtmlAgilityPack.HtmlNode node, HtmlAgilityPack.HtmlNode parentNode, ref bool foundParent) { var ret = new StringBuilder(); if (node != null) { HtmlAgilityPack.HtmlNode sibling = null; do { sibling = sibling != null ? sibling.PreviousSibling : node.PreviousSibling; if (sibling == parentNode) { foundParent = true; } if (sibling != null && sibling != parentNode) { var siblingInnerText = sibling.InnerText; if (!string.IsNullOrWhiteSpace(siblingInnerText)) { var text = HtmlEntity.DeEntitize(siblingInnerText).Trim(); ret.Append(text); ret.Append(" "); } } }while (sibling != null); } return(ret.ToString().Trim()); }
public YoutubeVideoEntry(HtmlNode node) { var url_node = node.SelectNodes(".//a[@href]"); if (url_node != null) { var url_value = url_node.FirstOrDefault().Attributes["href"].Value; var splitIndex = url_value.IndexOf("&"); if (splitIndex > 0) { url = "http://www.youtube.com" + url_value.Substring(0, splitIndex); } } var title_node = node.SelectNodes(".//span[contains(@class, 'video-title')]"); if (title_node != null) title = title_node.FirstOrDefault().InnerText; if (!String.IsNullOrEmpty(title)) title = title.Trim(); var img_node = node.SelectNodes(".//img[@src]"); if (img_node != null) imageUrl = "http:" + img_node.FirstOrDefault().Attributes["src"].Value; }
private string VariableNameResolver(HtmlNode row) { var isOptional = row.SelectSingleNode(@".//td[contains(@class,'description')]/span[@class='optional']"); var variable = row.SelectSingleNode(@".//td[@class=""name"" ]").InnerText + (isOptional != null ? "?" : ""); return variable; }
private static Gear ParseGear(HtmlNode gearNode, ProfileParseConfig config) { var gear = new Gear(); var gearArray = new[] { gearNode }; var gearImage = config.GearImageGetter(gearArray).Single(); var gearImageUris = ParseImage(gearImage, config); gear.ImageUri = gearImageUris.Item1; gear.RetinaImageUri = gearImageUris.Item2; var gearPowerMainSvg = config.GearPowerMainSvgGetter(gearArray).Single(); gear.GearPowerMainSvgUri = ParseImageUriFromStyle(gearPowerMainSvg, config); var gearPowerSub = config.GearPowerSubGetter(gearArray).Single(); var gearPowerSubArray = new[] { gearPowerSub }; var gearPowerSub1Svg = config.GearPowerSub1Getter(gearPowerSubArray).Single(); gear.GearPowerSub1SvgUri = ParseImageUriFromStyle(gearPowerSub1Svg, config); try { var gearPowerSub2Svg = config.GearPowerSub2Getter(gearPowerSubArray).Single(); gear.GearPowerSub2SvgUri = ParseImageUriFromStyle(gearPowerSub2Svg, config); var gearPowerSub3Svg = config.GearPowerSub3Getter(gearPowerSubArray).Single(); gear.GearPowerSub3SvgUri = ParseImageUriFromStyle(gearPowerSub3Svg, config); } catch (ArgumentOutOfRangeException) { } return gear; }
public object[] Parse(string subject, string body) { this.subject = subject; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(body); this.messageInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/div[2]"); if (messageInfo != null) { messageText = messageInfo.InnerText.Replace("\t", "").Replace("\r\n", " "); } this.productInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/table/tr[2]/td[2]/a/strong"); this.cantactInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/table[3]/tr/td"); object[] dataItem = new object[13]; dataItem[0] = 0; dataItem[1] = GetMsgIp(); dataItem[2] = GetOrigin(); dataItem[3] = GetProduct(); dataItem[4] = GetName(); dataItem[5] = GetMail(); dataItem[6] = GetCountry(); dataItem[7] = GetTelephone(); dataItem[8] = GetCompany(); dataItem[9] = GetAddress(); dataItem[10] = GetFax(); dataItem[11] = string.Empty; dataItem[12] = string.Empty; return dataItem; }
protected virtual int FindLimitIndex(HtmlNode currentNode, ref int currentCount, int maxCount) { if (currentNode.NodeType == HtmlNodeType.Text) { var prevCount = currentCount; currentCount += currentNode.InnerText.Length; if (currentCount >= maxCount) { var extraChars = maxCount - prevCount; return currentNode.StreamPosition + extraChars; } } if (currentNode.HasChildNodes) { foreach (var node in currentNode.ChildNodes) { var index = FindLimitIndex(node, ref currentCount, maxCount); if (index != -1) return index; } } return -1; }
public static List<HtmlNode> GetListNodeToTag(HtmlNode node, string tag,string att, bool remove_text = false) { // for vao chirdNode node = node.ChildNodes.Where(t => t.GetAttributeValue(tag, "") == att).ToList()[0]; //end return GetListNode(node, remove_text); }
private static ArticleInfo ParseArticleInfoDiv(HtmlNode articleDiv) { var linkToArticle = articleDiv.SelectSingleNode("h3/a"); var dateDiv = articleDiv.SelectSingleNode("div[@class='headline-date']"); var commentCountNode = articleDiv.SelectSingleNode("h3/a[@class='commentCount']"); var articleInfo = new ArticleInfo(); articleInfo.Url = linkToArticle.Attributes["href"].Value; if (articleInfo.Url.Contains(@"/video/")) { throw new CommonParsingException("Delfi TV article"); } articleInfo.Id.ExternalId = articleInfo.Url.GetQueryParameterValueFromUrl("id"); articleInfo.Title = linkToArticle.InnerText; articleInfo.DatePublished = DelfiWordyDateParser.Parse(dateDiv.InnerText); articleInfo.DateScraped = DateTime.UtcNow.AddHours(2); articleInfo.Id.Portal = Portal.Delfi; articleInfo.CommentCount = commentCountNode == null ? 0 : Convert.ToInt32(commentCountNode.InnerText.TrimStart('(').TrimEnd(')')); var articleId = Convert.ToInt32(articleInfo.Url.GetQueryParameterValueFromUrl("id")); if (articleId == 0) throw new CommonParsingException("Article id not found"); return articleInfo; }
static bool Get_IDX_HASTC(string address, ref MarketData hastcIdx) { bool retVal = true; CultureInfo dataCulture = common.language.GetCulture("en-US"); HttpWebRequest wRequest = HttpWebRequest.Create(new Uri(address)) as HttpWebRequest; HttpWebResponse wResponse = wRequest.GetResponse() as HttpWebResponse; StreamReader reader = new StreamReader(wResponse.GetResponseStream()); string htmlContent = reader.ReadToEnd(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlContent); HtmlAgilityPack.HtmlNode nodeHNXIndex = doc.GetElementbyId("IDX"); HtmlAgilityPack.HtmlNode nodeTongKL = doc.GetElementbyId("QTY"); if (nodeHNXIndex != null) { hastcIdx.Value = decimal.Parse(nodeHNXIndex.InnerHtml, dataCulture); } else { retVal = false; } if (nodeTongKL != null) { hastcIdx.TotalQty = decimal.Parse(nodeTongKL.InnerHtml, dataCulture); } else { retVal = false; } hastcIdx.TotalAmt = 0; return(retVal); }