private void GetCusRequirements(HtmlNodeNavigator lotNav, MySqlConnection connect, int idLot, int customerId) { var delivPlace = (lotNav .SelectSingleNode( "./following-sibling::tr/th[contains(text(), 'Место поставки товара, выполнения работ, оказания услуг')]/following-sibling::td/div") ?.Value ?? "").ReplaceHtmlEntyty().Trim(); var delivTerm = (lotNav .SelectSingleNode( "./following-sibling::tr/th[contains(text(), 'Срок поставки')]/following-sibling::td") ?.Value ?? "").Trim(); if (!string.IsNullOrEmpty(delivTerm) || !string.IsNullOrEmpty(delivPlace)) { var insertCustomerRequirement = $"INSERT INTO {AppBuilder.Prefix}customer_requirement SET id_lot = @id_lot, id_customer = @id_customer, delivery_place = @delivery_place, max_price = @max_price, delivery_term = @delivery_term"; var cmd16 = new MySqlCommand(insertCustomerRequirement, connect); cmd16.Prepare(); cmd16.Parameters.AddWithValue("@id_lot", idLot); cmd16.Parameters.AddWithValue("@id_customer", customerId); cmd16.Parameters.AddWithValue("@delivery_place", delivPlace); cmd16.Parameters.AddWithValue("@max_price", ""); cmd16.Parameters.AddWithValue("@delivery_term", delivTerm); cmd16.ExecuteNonQuery(); } }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var ret = new StringBuilder(); if (nodeNavigator?.CurrentNode != null) { foreach (HtmlNode node in nodeNavigator.CurrentNode.SelectNodes(".//text()")) { var nodeInnerText = node.InnerText; if (nodeInnerText != null) { if (ret.Length > 0) { ret.Append(" "); } ret.Append(nodeInnerText); } } } if (ret.Length == 0) { var navVal = nodeNavigator?.Value; if (navVal != null) { return(navVal); } } return(ret.ToString()); }
private void GetPurchaseObjects(HtmlNodeNavigator lotNav, MySqlConnection connect, int idLot, int customerId) { var okpd2 = (lotNav .SelectSingleNode( "./following-sibling::tr/th[.contains(text(), 'Код ОКРБ')]/following-sibling::td") ?.Value ?? "").Trim(); var purObjects = lotNav.Select( "./td[2]/text()"); if (purObjects is null) { return; } foreach (XPathNavigator po in purObjects) { var namePo = po?.Value?.ReplaceHtmlEntyty()?.Trim() ?? ""; var insertLotitem = $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, quantity_value = @quantity_value, okei = @okei, customer_quantity_value = @customer_quantity_value, price = @price, sum = @sum, okpd2_code = @okpd2_code"; var cmd19 = new MySqlCommand(insertLotitem, connect); cmd19.Prepare(); cmd19.Parameters.AddWithValue("@id_lot", idLot); cmd19.Parameters.AddWithValue("@id_customer", customerId); cmd19.Parameters.AddWithValue("@name", namePo); cmd19.Parameters.AddWithValue("@quantity_value", ""); cmd19.Parameters.AddWithValue("@okei", ""); cmd19.Parameters.AddWithValue("@customer_quantity_value", ""); cmd19.Parameters.AddWithValue("@price", ""); cmd19.Parameters.AddWithValue("@sum", ""); cmd19.Parameters.AddWithValue("@okpd2_code", okpd2); cmd19.ExecuteNonQuery(); } }
private void ParseNewProduct(HtmlNodeNavigator navigator, Models.Product product, ParsingResults res, string link) { string header = ConnectStrings(navigator.Select(Rules_.HeaderXpath)); string desctiption = ConnectStrings(navigator.Select(Rules_.DescriptionXpath)); string priceValue = ConnectStrings(navigator.Select(Rules_.PriceXpath)); List <string> pictures = ListValue(navigator.Select(Rules_.PicturesXpath)); product = new Models.Product() { Description = desctiption, Name = header, Link = link, ImageUrl = (pictures.Count > 0 ? pictures[0] : ""), Price = priceValue }; Models.Price price = new Models.Price() { Product = product, Date = DateTime.Now, PriceValue = priceValue }; List <Models.Picture> thumbs = new List <Models.Picture>(); foreach (string url in pictures) { thumbs.Add(new Models.Picture() { Product = product, PictureUrl = url }); } res.AddedPictures.AddRange(thumbs); res.AddedPrices.Add(price); res.AddedProducts.Add(product); }
public static string ConvertToString(object argument, bool outer, string separator) { if (argument == null) { return(null); } string s = argument as string; if (s != null) { return(s); } XPathNodeIterator it = argument as XPathNodeIterator; if (it != null) { if (!it.MoveNext()) { return(null); } StringBuilder sb = new StringBuilder(); do { HtmlNodeNavigator n = it.Current as HtmlNodeNavigator; if (n != null && n.CurrentNode != null) { if (sb.Length > 0 && separator != null) { sb.Append(separator); } sb.Append(outer ? n.CurrentNode.OuterHtml : n.CurrentNode.InnerHtml); } }while (it.MoveNext()); return(sb.ToString()); } IEnumerable enumerable = argument as IEnumerable; if (enumerable != null) { StringBuilder sb = null; foreach (object arg in enumerable) { if (sb == null) { sb = new StringBuilder(); } if (sb.Length > 0 && separator != null) { sb.Append(separator); } string s2 = ConvertToString(arg, outer, separator); if (s2 != null) { sb.Append(s2); } } return(sb != null?sb.ToString() : null); } return(string.Format("{0}", argument)); }
private async Task PopulateVersions(string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); Names.Clear(); Ids.Clear(); var nav = doc.CreateNavigator(); var res = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[2]/div[2]/div/div/table/tbody/tr[*]/td[2]/a"); var res2 = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[2]/div[2]/div/div/table/tbody/tr[*]/td[5]/div/div"); if (res.Count == 0) { res = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[3]/div[2]/div/div/table/tbody/tr[*]/td[2]/a[1]"); res2 = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[3]/div[2]/div/div/table/tbody/tr[*]/td[5]/div/div"); } if (res.Count == 0) { Global.ShowError("Failed to load versions..."); } foreach (HtmlNodeNavigator node in res) { var urlsplit = node.CurrentNode.GetAttributeValue("href", "").Split('/'); res2.MoveNext(); HtmlNodeNavigator versionNode = (HtmlNodeNavigator)res2.Current; Names.Add(node.CurrentNode.InnerText + " - " + versionNode.CurrentNode.InnerText.Replace("\r\n", "").Trim()); Ids.Add(urlsplit[urlsplit.Length - 1]); } }
protected bool RunHtmlPathSearch() { HtmlDocument document = new HtmlDocument(); document.LoadHtml(htmlText); HtmlNodeNavigator navigator = (HtmlNodeNavigator)document.CreateNavigator(); //https://stackoverflow.com/questions/1390568/how-can-i-match-on-an-attribute-that-contains-a-certain-string //sample htmlPath to get download link: @"//a[contains(@class, 'ModDetails_hidden')]//@href" //HtmlNodeCollection clientVersionNode = node.SelectNodes(@"//div[contains(@class, 'ModDetails_label')]"); Logging.Debug(LogOptions.ClassName, "Searching using html path: {0}", HtmlPath); try { ResultNode = navigator.SelectSingleNode(HtmlPath) as HtmlNodeNavigator; } catch (XPathException ex) { Logging.Exception(ex.ToString()); return(false); } if (ResultNode == null) { Logging.Info(LogOptions.ClassName, "Result was not found"); return(false); } else { Logging.Info(LogOptions.ClassName, "HtmlPath results in node value '{0}' of type '{1}'", ResultNode.InnerXml, ResultNode.NodeType.ToString()); Logging.Info(LogOptions.ClassName, "Result value as text: {0}\nResult inner html: {1}\nResult outer html: {2}", ResultNode.Value, ResultNode.InnerXml, ResultNode.OuterXml); ResultString = ResultNode.ToString(); return(true); } }
private void AddOrganizer(MySqlConnection connect, HtmlNodeNavigator navigator, out int organiserId) { organiserId = 0; if (!string.IsNullOrEmpty(_tn.OrgName)) { var selectOrg = $"SELECT id_organizer FROM {AppBuilder.Prefix}organizer WHERE full_name = @full_name"; var cmd3 = new MySqlCommand(selectOrg, connect); cmd3.Prepare(); cmd3.Parameters.AddWithValue("@full_name", _tn.OrgName); var dt3 = new DataTable(); var adapter3 = new MySqlDataAdapter { SelectCommand = cmd3 }; adapter3.Fill(dt3); if (dt3.Rows.Count > 0) { organiserId = (int)dt3.Rows[0].ItemArray[0]; } else { var phone = navigator.SelectSingleNode( "//td[contains(., 'Номер контактного телефона заказчика')]/following-sibling::td") ?.Value?.Trim() ?? ""; var email = navigator.SelectSingleNode( "//td[contains(., 'Контактный адрес e-mail:')]/following-sibling::td") ?.Value ?.Trim() ?? ""; var contactPerson = navigator.SelectSingleNode( "//td[contains(., 'Контактное лицо:') or contains(., 'Ответственное лицо:')]/following-sibling::td") ?.Value?.Trim() ?? ""; var postAddr = navigator.SelectSingleNode( "//td[contains(., 'Почтовый адрес заказчика:')]/following-sibling::td") ?.Value?.Trim() ?? ""; var address = navigator.SelectSingleNode( "//td[contains(., 'Местонахождение заказчика:')]/following-sibling::td") ?.Value?.Trim() ?? ""; var addOrganizer = $"INSERT INTO {AppBuilder.Prefix}organizer SET full_name = @full_name, contact_phone = @contact_phone, contact_person = @contact_person, contact_email = @contact_email, post_address = @post_address, fact_address = @fact_address"; var cmd4 = new MySqlCommand(addOrganizer, connect); cmd4.Prepare(); cmd4.Parameters.AddWithValue("@full_name", _tn.OrgName); cmd4.Parameters.AddWithValue("@contact_phone", phone); cmd4.Parameters.AddWithValue("@contact_person", contactPerson); cmd4.Parameters.AddWithValue("@contact_email", email); cmd4.Parameters.AddWithValue("@post_address", postAddr); cmd4.Parameters.AddWithValue("@fact_address", address); cmd4.ExecuteNonQuery(); organiserId = (int)cmd4.LastInsertedId; } } }
private void FillPurName(HtmlNodeNavigator navigator) { if (string.IsNullOrEmpty(_tn.PurName)) { var firstPurName = navigator.SelectSingleNode( "//div[@class = 's2']") ?.Value?.Trim() ?? ""; _tn.PurName = $"{_tn.FullPw} {firstPurName}"; } }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var rawDate = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText; if (rawDate != null) { return(ParseDate(settings, rawDate)); } return(null); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var node = nodeNavigator?.CurrentNode; HtmlAgilityPack.HtmlNode sibling = null; var level = 0; var maxLevel = 3; var maxTitleLength = 200; if (settings != null && settings.ContainsKey("_maxStepsUpward") && ((JValue)settings["_maxStepsUpward"]).Type == JTokenType.Integer) { maxLevel = ((JValue)settings["_maxStepsUpward"]).ToObject <int>(); } if (settings != null && settings.ContainsKey("_maxTitleLength") && ((JValue)settings["_maxTitleLength"]).Type == JTokenType.Integer) { maxTitleLength = ((JValue)settings["_maxTitleLength"]).ToObject <int>(); } do { level++; sibling = sibling != null ? sibling.PreviousSibling : node.PreviousSibling; if (sibling != null && this.IsAllowedTypeRecursive(sibling)) { var siblingInnerText = sibling.InnerText; if (!string.IsNullOrWhiteSpace(siblingInnerText)) { var text = HtmlEntity.DeEntitize(siblingInnerText).Trim(); if (text.Length <= maxTitleLength) { return(text); } else if (text.Length > 0) { // We will stop if the first title candidate we find is not valid, but we will continue if the text was empty return(null); } } } // At this point we did not return any text, so the text node is empty, or this is a comment. // We will decrement level so we ignore this node completely if (sibling != null && (sibling.NodeType == HtmlAgilityPack.HtmlNodeType.Text || sibling.NodeType == HtmlAgilityPack.HtmlNodeType.Comment)) { level--; } }while (sibling != null && level < maxLevel); return(null); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var text = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText; if (text != null) { return(WebUtility.UrlDecode(text)); } return(null); }
public List <KeyValuePair <string, System.IO.Stream> > GetHttpFileAttachment(System.IO.Stream htmlDocumentStream) { var navigator = new HtmlNodeNavigator(htmlDocumentStream); // get new view tokens this.TryGetTokens(navigator, out this.viewState, out this.eventValidation); var formAction = navigator.CurrentNode.SelectSingleNode(".//form[@name='attachForm']").GetAttributeValue("action", ""); formAction = formAction.Replace("&", "&"); var uriToFile = "http://www.bldgportal.com/RFI/Application/" + formAction; // all <a> elements with the "title" attribute var fileNodes = navigator.CurrentNode.SelectNodes(@".//a[@title]"); var attachments = new List <KeyValuePair <string, System.IO.Stream> >(); if (fileNodes == null) { return(attachments); } foreach (var fileLink in fileNodes) { var target = fileLink.Id; target = target.Replace('_', '$'); var postData = new List <KeyValuePair <string, string> >(); postData.Add(new KeyValuePair <string, string>("__VIEWSTATE", viewState)); postData.Add(new KeyValuePair <string, string>("__EVENTVALIDATION", eventValidation)); postData.Add(new KeyValuePair <string, string>("__EVENTTARGET", target)); postData.Add(new KeyValuePair <string, string>("__EVENTARGUMENT", "")); postData.Add(new KeyValuePair <string, string>("validFileTypes", "doc, docx, xls, xlsx, xlsm, dwg, dwgx, dwf, dwfx, bmp, gif, jpg, jpeg, tif, tiff, pdf, txt, rtf, ai")); postData.Add(new KeyValuePair <string, string>("fileID", "")); var content = new FormUrlEncodedContent(postData); var response = client.PostAsync(uriToFile, content).Result; // var fileStream = System.IO.File.Create( String.Format(@"..\{0}",fileLink.GetAttributeValue("title", "") ) ); // var httpStream = response.Content.ReadAsStreamAsync().Result; // httpStream.CopyTo(fileStream); // // fileStream.Flush(); attachments.Add(new KeyValuePair <string, System.IO.Stream> (String.Format(@"..\{0}", fileLink.GetAttributeValue("title", "")), response.Content.ReadAsStreamAsync().Result)); } return(attachments); }
/// <summary> /// 根据相对路径XPath从单一Item的BaseNode节点提取某一个字段的Node的InnerText /// </summary> /// <param name="BaseNode">一个Item的根节点</param> /// <param name="RelXPath">相对XPath路径</param> /// <param name="CleanConnectionMark">是否清洗文本</param> /// <returns></returns> internal static string ExtractInnerTextFromBaseNode(HtmlNode BaseNode, string RelXPath, int postion, bool CleanConnectionMark = true) { if (BaseNode == null) { return(null); } if (string.IsNullOrWhiteSpace(RelXPath)) { if (CleanConnectionMark) { return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode))); } else { return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode), true, true, true, false, true, false)); } } string innerTextValue = ""; try { HtmlNodeNavigator navigator = (HtmlNodeNavigator)BaseNode.CreateNavigator(); var node = navigator.SelectSingleNode(RelXPath); innerTextValue = node.Value; } catch (Exception ex) { } if (string.IsNullOrWhiteSpace(innerTextValue)) { IEnumerable <HtmlNode> MatchNodes = BaseNode.SelectNodes(RelXPath); if (MatchNodes != null) { MatchNodes = MatchNodes.Where(n => !string.IsNullOrEmpty(XPathUtility.InnerTextNonDescendants(n))); } if (!string.IsNullOrWhiteSpace(RelXPath) && (MatchNodes == null || MatchNodes.Count() == 0)) { return(null); } innerTextValue = XPathUtility.InnerTextNonDescendants(MatchNodes.First()); } if (CleanConnectionMark) { return(TextCleaner.FullClean(innerTextValue)); } else { return(TextCleaner.FullClean(innerTextValue, true, true, true, false, true, false)); } }
public override INode[] SelectNodes(XPathExpression expr) { var nav = Wrapped.CreateNavigator(); var iter = nav.Select(expr); var ret = new INode[iter.Count]; while (iter.MoveNext()) { HtmlNodeNavigator n = (HtmlNodeNavigator)iter.Current; ret[iter.CurrentPosition - 1] = new HtmlNodeWrapper(n); } return(ret); }
private void FillNoticeVer(HtmlNodeNavigator navigator, out string noticeVer) { var comments = navigator.SelectSingleNode( "//td[b[. = 'Комментарии:']]") ?.Value?.Trim() ?? ""; var providingDocumentation = navigator.SelectSingleNode( "//td[contains(., 'Порядок предоставления документации по закупке:')]/following-sibling::td") ?.Value?.Trim() ?? ""; noticeVer = $"{comments}\nПорядок предоставления документации по закупке: {providingDocumentation}".Trim(); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlNode> logicalParents) { string text = nodeNavigator?.Value; if (text != null) { this.settings = settings; LoadSettings(); text = HtmlEntity.DeEntitize(text).Trim(); return(RemovePatternsFromText(text)); } return(null); }
private void LogMeIn(string uri) { var body = this.Navigate(uri); var navigator = new HtmlNodeNavigator(body); this.TryGetTokens(navigator, out this.viewState, out this.eventValidation); var postData = new List <KeyValuePair <string, string> >(); postData.Add(new KeyValuePair <string, string>("__VIEWSTATE", viewState)); postData.Add(new KeyValuePair <string, string>("__EVENTVALIDATION", eventValidation)); postData.Add(new KeyValuePair <string, string>("UserNameTextBox", creds.Username)); postData.Add(new KeyValuePair <string, string>("PasswordTextBox", creds.Password)); postData.Add(new KeyValuePair <string, string>("ConfCheckBox", "on")); postData.Add(new KeyValuePair <string, string>("LoginButton", "Authenticate")); postData.Add(new KeyValuePair <string, string>("RequestPasswordTextBox", "")); var content = new FormUrlEncodedContent(postData); var response = client.PostAsync(uri + "?TIMEOUT=true", content).Result; navigator = new HtmlNodeNavigator(body); var doc = navigator.CurrentDocument; doc.Save(@"..\login.html"); uri = @"https://www.bldgportal.com/Application/PortalMain.aspx"; navigator = new HtmlNodeNavigator(this.Navigate(uri)); doc = navigator.CurrentDocument; doc.Save(@"..\portal.html"); var success = navigator.MoveToId("UserIdTable"); var iterator = navigator.CurrentNode.SelectNodes(@".//a[@onclick]"); var nodeLin = iterator[0]; // get and use the SID to log into the RFI subsystem var regex = new System.Text.RegularExpressions.Regex(@"(?<sid>\?SID=.*)'"); var match = regex.Match(nodeLin.Attributes["onclick"].Value); var group = match.Groups; this.GlobalEvocoId = group["sid"].Value; }
/// <summary> /// Builds the content tree out of <see cref="HtmlDocument"/> /// </summary> /// <param name="htmlDoc">The HTML document</param> /// <param name="__name">The name of the root</param> /// <returns></returns> public static nodeTree buildTree(this HtmlDocument htmlDoc, string __name, Boolean allowTitle = false, Boolean allowMeta = false) { nodeTree output = new nodeTree(__name, htmlDoc); XPathNodeIterator iterator = htmlDoc.CreateNavigator().Select("//*[text()][count(*)=0]"); //XPathNodeIterator iterator = htmlDoc.CreateNavigator().SelectDescendants(System.Xml.XPath.XPathNodeType.Text, false); while (iterator.MoveNext()) { XPathNavigator current = iterator.Current; HtmlNodeNavigator htmlNavigator = current as HtmlNodeNavigator; string sp = htmlNavigator.CurrentNode.XPath.Replace("/", "\\"); string cn = ""; if (htmlNavigator.CurrentNode.Name.ToLower() == "title") { } if (isTagAcceptable(htmlNavigator.CurrentNode, null, allowTitle, allowMeta)) { cn = ""; cn = htmlNavigator.CurrentNode.InnerText; //if (htmlNavigator.CurrentNode.checkTextHtmlConsistensy()) //{ //} cn = cn.htmlContentProcess().Trim(); if (!cn.isNullOrEmptyString()) { graphWrapNode <htmlWrapper> nn = output.Add(sp, htmlNavigator.CurrentNode.Clone()); nn.item.content = cn; nn.item.xPath = sp; nn.item.path = nn.path; // nodesWithText.AddNewLeaf(sp, htmlNavigator.CurrentNode.Clone(), report, cn); } else { } } else { } } return(output); }
private static ScrapeInfo useXPath() { ScrapeInfo values = new ScrapeInfo(); HtmlWeb webClient = new HtmlWeb(); HtmlDocument firstInventoryPage = webClient.Load(url); HtmlNodeNavigator navigator = (HtmlNodeNavigator)firstInventoryPage.DocumentNode.SelectSingleNode("//div[contains(@class,\"hproduct\")][@data-index-position=\"1\"]").CreateNavigator(); values.Vin = navigator.SelectSingleNode("@data-vin").Value; values.Price = navigator.SelectSingleNode("//span[contains(@class,\"internetPrice\")]//span[@class=\"value\"]/text()").Value; values.Make = navigator.SelectSingleNode("@data-make").Value; values.Model = navigator.SelectSingleNode("@data-model").Value; values.PhotoUrl = navigator.SelectSingleNode("//div[@class=\"media\"]//img/@src").Value; return(values); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var text = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText; if (text != null) { int intVal; if (int.TryParse(text, out intVal)) { return(intVal); } } return(null); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var ret = new StringBuilder(); var foundParent = false; var currentNode = nodeNavigator?.CurrentNode; if (logicalParents != null && logicalParents.Count >= 2) { // We will skip out immediate parent because that's the list, we need the parent of the list, which is out grandparent var grandParentNode = logicalParents[logicalParents.Count - 2]; HtmlAgilityPack.HtmlNode parentNode = grandParentNode; if (settings != null && settings.ContainsKey("_startingXPath") && ((JValue)settings["_startingXPath"]).Type == JTokenType.String) { var startingXPath = ((JValue)settings["_startingXPath"]).ToObject <string>(); var nodes = parentNode.SelectNodes(startingXPath); if (nodes != null && nodes.Count > 0) { parentNode = nodes[0]; } else { return(0); } } while (currentNode != null && currentNode != parentNode && !foundParent) { var siblingText = this.GetTextFromSiblings(currentNode, parentNode, ref foundParent); if (!string.IsNullOrEmpty(siblingText)) { ret.Append(siblingText); ret.Append(" "); } currentNode = currentNode.ParentNode; } } var text = ret.ToString().Trim(); return(text.Length); }
public ParsingResults Parse(System.Net.WebClient client, List <Models.Product> ProductList) { HtmlDocument doc = new HtmlDocument(); for (int i = 0; i < PagesLinks_.Count; i++) { try { string source = client.DownloadString(PagesLinks_[i]); Console.Write(PagesLinks_[i] + " " + i.ToString() + "\n"); doc.LoadHtml(source); HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator(); var productNodes = navigator.Select(Rules_.DetailsXpath); AddProducts(productNodes); var pagesNodes = navigator.Select(Rules_.PaginationXpath); AddPages(pagesNodes); } catch (System.Net.WebException) { } } ParsingResults res = new ParsingResults(); for (int i = 0; i < ProductsLinks_.Count; i++) { string source = client.DownloadString(ProductsLinks_[i]); Console.Write(ProductsLinks_[i] + " " + i.ToString() + "\n"); doc.LoadHtml(source); HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator(); var product = IsAlreadyParsed(source, ProductList); if (product != null) { string priceValue = ConnectStrings(navigator.Select(Rules_.PriceXpath)); Models.Price price = new Models.Price() { Product = product, Date = DateTime.Now, PriceValue = priceValue }; product.Price = priceValue; res.AddedPrices.Add(price); } else { ParseNewProduct(navigator, product, res, ProductsLinks_[i]); } } return(res); }
private void FillBidAndScorDates(HtmlNodeNavigator navigator, out DateTime scoringDate, out DateTime biddingDate) { var scoringDateT = navigator.SelectSingleNode( "//td[contains(., 'Дата и время рассмотрения заявок:') or contains(., 'Дата рассмотрения заявок:')]/following-sibling::td") ?.Value?.Trim() ?? ""; scoringDate = scoringDateT.ParseDateUn("dd.MM.yyyy HH:mm"); var biddingDateT = navigator.SelectSingleNode( "//td[contains(., 'Дата начала аукциона')]/following-sibling::td")?.Value?.Trim() ?? ""; biddingDate = biddingDateT.ParseDateUn("dd.MM.yyyy HH:mm"); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var separator = ","; var trim = false; var node = nodeNavigator?.CurrentNode; if (node != null) { var text = node.InnerText; if (!string.IsNullOrWhiteSpace(text)) { if (settings != null && settings.ContainsKey("_separator") && ((JValue)settings["_separator"]).Type == JTokenType.String) { separator = settings["_separator"].ToString(); } if (settings != null && settings.ContainsKey("_trim") && ((JValue)settings["_trim"]).Type == JTokenType.Boolean) { trim = (bool)((JValue)settings["_trim"]).Value; } } try { var textParts = text.Split(new string[] { separator }, StringSplitOptions.None); if (trim) { for (var i = 0; i < textParts.Length; i++) { textParts[i] = HtmlEntity.DeEntitize(textParts[i]).Trim(); } } return(new JArray(textParts)); } catch (ArgumentException) { } } return(null); }
private void AddPurObjectFirst(MySqlConnection connect, int customerId, HtmlNodeNavigator nav, int idLot, string lotName, string sum) { var okpd2 = nav.SelectSingleNode( "//td[contains(., 'Категория ОКПД2:')]/following-sibling::td/div/b") ?.Value?.Trim() ?? ""; var okpdName = nav.SelectSingleNode( "//td[contains(., 'Категория ОКПД2:')]/following-sibling::td/div") ?.Value?.ReplaceHtmlEntyty().Trim() ?? ""; if (!string.IsNullOrEmpty(okpd2)) { okpdName = okpdName.Replace(okpd2, ""); } if (okpdName.Contains("Показать все")) { okpdName = ""; } var quantity = nav.SelectSingleNode( "//td[contains(., 'Количество:')]/following-sibling::td") ?.Value?.Trim().ExtractPrice() ?? ""; var insertLotitem = $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, sum = @sum, okpd2_code = @okpd2_code, okpd2_group_code = @okpd2_group_code, okpd2_group_level1_code = @okpd2_group_level1_code, okpd_name = @okpd_name, quantity_value = @quantity_value, customer_quantity_value = @customer_quantity_value, okei = @okei, price = @price"; var cmd19 = new MySqlCommand(insertLotitem, connect); cmd19.Prepare(); cmd19.Parameters.AddWithValue("@id_lot", idLot); cmd19.Parameters.AddWithValue("@id_customer", customerId); cmd19.Parameters.AddWithValue("@name", lotName); cmd19.Parameters.AddWithValue("@sum", sum); cmd19.Parameters.AddWithValue("@okpd2_code", okpd2); cmd19.Parameters.AddWithValue("@okpd2_group_code", ""); cmd19.Parameters.AddWithValue("@okpd2_group_level1_code", ""); cmd19.Parameters.AddWithValue("@okpd_name", okpdName); cmd19.Parameters.AddWithValue("@quantity_value", quantity); cmd19.Parameters.AddWithValue("@customer_quantity_value", quantity); cmd19.Parameters.AddWithValue("@okei", ""); cmd19.Parameters.AddWithValue("@price", ""); cmd19.ExecuteNonQuery(); }
public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents) { var node = nodeNavigator?.CurrentNode; if (node != null) { var text = node.InnerText; if (text != null) { text = HtmlEntity.DeEntitize(text).Trim(); text = ExtraWhitespacesRegex.Replace(text, " "); return(text); } } return(null); }
//private void ProgressChanged(object sender, DownloadProgressChangedEventArgs e) //{ // progressBar.Value = e.ProgressPercentage; //} //private void click_Click(object sender, EventArgs e) //{ // WebClient webClient = new WebClient(); // webClient.DownloadFileCompleted += new AsyncCompletedEventHandler(DownloadCompleted); // webClient.DownloadProgressChanged += new DownloadProgressChangedEventHandler(ProgressChanged); // string sourceFile = $"http://openload.co/stream/Ki5y8-mPcoE~1558022792~95.108.0.0~mdmeUiAb?"; // webClient.DownloadFileAsync(new Uri(sourceFile), "test.mp4"); //} //private void DownloadCompleted(object sender, AsyncCompletedEventArgs e) //{ // MessageBox.Show("The download is completed!"); //} private void button1_Click(object sender, EventArgs e) { using (var client = new WebClient()) { string page = adres.Text; string html = client.DownloadString(page); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator(); string xPath = "//*[@id=\"olvideo_html5_api\"]"; string val = navigator.SelectSingleNode(xPath).Value; adres.Text = val; } }
private List <ResultEntity> GetNodeValue(string html, string xpath) { var list = new List <ResultEntity>(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); var root = doc.DocumentNode; HtmlNodeNavigator navigator = (HtmlNodeNavigator)root.CreateNavigator(); var nodes = navigator.Select(xpath); ; foreach (HtmlNodeNavigator node in nodes) { list.Add(new ResultEntity() { Value = node.Value, Path = node.CurrentNode.XPath }); } return(list); }
private void AddLots(HtmlDocument htmlDoc, HtmlNodeNavigator navigator, MySqlConnection connect, int idTender, int customerId) { var lotAdded = false; var lots = htmlDoc.DocumentNode.SelectNodes( "//div[@class = 'expandable-text short']//a[contains(., 'Лот №')]") ?? new HtmlNodeCollection(null); foreach (var lot in lots) { AddLot(navigator, connect, idTender, customerId, lot, out var lotWasAdded); lotAdded = lotWasAdded; } if (lots.Count < 1 || !lotAdded) { AddOneLot(navigator, connect, idTender, customerId); } }