コード例 #1
0
        private void GetCusRequirements(HtmlNodeNavigator lotNav, MySqlConnection connect, int idLot, int customerId)
        {
            var delivPlace = (lotNav
                              .SelectSingleNode(
                                  "./following-sibling::tr/th[contains(text(), 'Место поставки товара, выполнения работ, оказания услуг')]/following-sibling::td/div")
                              ?.Value ?? "").ReplaceHtmlEntyty().Trim();
            var delivTerm = (lotNav
                             .SelectSingleNode(
                                 "./following-sibling::tr/th[contains(text(), 'Срок поставки')]/following-sibling::td")
                             ?.Value ?? "").Trim();

            if (!string.IsNullOrEmpty(delivTerm) || !string.IsNullOrEmpty(delivPlace))
            {
                var insertCustomerRequirement =
                    $"INSERT INTO {AppBuilder.Prefix}customer_requirement SET id_lot = @id_lot, id_customer = @id_customer, delivery_place = @delivery_place, max_price = @max_price, delivery_term = @delivery_term";
                var cmd16 = new MySqlCommand(insertCustomerRequirement, connect);
                cmd16.Prepare();
                cmd16.Parameters.AddWithValue("@id_lot", idLot);
                cmd16.Parameters.AddWithValue("@id_customer", customerId);
                cmd16.Parameters.AddWithValue("@delivery_place", delivPlace);
                cmd16.Parameters.AddWithValue("@max_price", "");
                cmd16.Parameters.AddWithValue("@delivery_term", delivTerm);
                cmd16.ExecuteNonQuery();
            }
        }
コード例 #2
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var ret = new StringBuilder();

            if (nodeNavigator?.CurrentNode != null)
            {
                foreach (HtmlNode node in nodeNavigator.CurrentNode.SelectNodes(".//text()"))
                {
                    var nodeInnerText = node.InnerText;

                    if (nodeInnerText != null)
                    {
                        if (ret.Length > 0)
                        {
                            ret.Append(" ");
                        }

                        ret.Append(nodeInnerText);
                    }
                }
            }

            if (ret.Length == 0)
            {
                var navVal = nodeNavigator?.Value;

                if (navVal != null)
                {
                    return(navVal);
                }
            }

            return(ret.ToString());
        }
コード例 #3
0
        private void GetPurchaseObjects(HtmlNodeNavigator lotNav, MySqlConnection connect,
                                        int idLot,
                                        int customerId)
        {
            var okpd2 = (lotNav
                         .SelectSingleNode(
                             "./following-sibling::tr/th[.contains(text(), 'Код ОКРБ')]/following-sibling::td")
                         ?.Value ?? "").Trim();
            var purObjects = lotNav.Select(
                "./td[2]/text()");

            if (purObjects is null)
            {
                return;
            }
            foreach (XPathNavigator po in purObjects)
            {
                var namePo        = po?.Value?.ReplaceHtmlEntyty()?.Trim() ?? "";
                var insertLotitem =
                    $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, quantity_value = @quantity_value, okei = @okei, customer_quantity_value = @customer_quantity_value, price = @price, sum = @sum, okpd2_code = @okpd2_code";
                var cmd19 = new MySqlCommand(insertLotitem, connect);
                cmd19.Prepare();
                cmd19.Parameters.AddWithValue("@id_lot", idLot);
                cmd19.Parameters.AddWithValue("@id_customer", customerId);
                cmd19.Parameters.AddWithValue("@name", namePo);
                cmd19.Parameters.AddWithValue("@quantity_value", "");
                cmd19.Parameters.AddWithValue("@okei", "");
                cmd19.Parameters.AddWithValue("@customer_quantity_value", "");
                cmd19.Parameters.AddWithValue("@price", "");
                cmd19.Parameters.AddWithValue("@sum", "");
                cmd19.Parameters.AddWithValue("@okpd2_code", okpd2);
                cmd19.ExecuteNonQuery();
            }
        }
コード例 #4
0
        private void ParseNewProduct(HtmlNodeNavigator navigator, Models.Product product, ParsingResults res, string link)
        {
            string        header      = ConnectStrings(navigator.Select(Rules_.HeaderXpath));
            string        desctiption = ConnectStrings(navigator.Select(Rules_.DescriptionXpath));
            string        priceValue  = ConnectStrings(navigator.Select(Rules_.PriceXpath));
            List <string> pictures    = ListValue(navigator.Select(Rules_.PicturesXpath));

            product = new Models.Product()
            {
                Description = desctiption,
                Name        = header,
                Link        = link,
                ImageUrl    = (pictures.Count > 0 ? pictures[0] : ""),
                Price       = priceValue
            };
            Models.Price price = new Models.Price()
            {
                Product = product, Date = DateTime.Now, PriceValue = priceValue
            };
            List <Models.Picture> thumbs = new List <Models.Picture>();

            foreach (string url in pictures)
            {
                thumbs.Add(new Models.Picture()
                {
                    Product = product, PictureUrl = url
                });
            }
            res.AddedPictures.AddRange(thumbs);
            res.AddedPrices.Add(price);
            res.AddedProducts.Add(product);
        }
コード例 #5
0
    public static string ConvertToString(object argument, bool outer, string separator)
    {
        if (argument == null)
        {
            return(null);
        }
        string s = argument as string;

        if (s != null)
        {
            return(s);
        }
        XPathNodeIterator it = argument as XPathNodeIterator;

        if (it != null)
        {
            if (!it.MoveNext())
            {
                return(null);
            }
            StringBuilder sb = new StringBuilder();
            do
            {
                HtmlNodeNavigator n = it.Current as HtmlNodeNavigator;
                if (n != null && n.CurrentNode != null)
                {
                    if (sb.Length > 0 && separator != null)
                    {
                        sb.Append(separator);
                    }
                    sb.Append(outer ? n.CurrentNode.OuterHtml : n.CurrentNode.InnerHtml);
                }
            }while (it.MoveNext());
            return(sb.ToString());
        }
        IEnumerable enumerable = argument as IEnumerable;

        if (enumerable != null)
        {
            StringBuilder sb = null;
            foreach (object arg in enumerable)
            {
                if (sb == null)
                {
                    sb = new StringBuilder();
                }
                if (sb.Length > 0 && separator != null)
                {
                    sb.Append(separator);
                }
                string s2 = ConvertToString(arg, outer, separator);
                if (s2 != null)
                {
                    sb.Append(s2);
                }
            }
            return(sb != null?sb.ToString() : null);
        }
        return(string.Format("{0}", argument));
    }
コード例 #6
0
        private async Task PopulateVersions(string html)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            Names.Clear();
            Ids.Clear();

            var nav  = doc.CreateNavigator();
            var res  = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[2]/div[2]/div/div/table/tbody/tr[*]/td[2]/a");
            var res2 = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[2]/div[2]/div/div/table/tbody/tr[*]/td[5]/div/div");

            if (res.Count == 0)
            {
                res  = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[3]/div[2]/div/div/table/tbody/tr[*]/td[2]/a[1]");
                res2 = nav.Select("/html/body/div[1]/main/div[1]/div[2]/section/div/div/div/section/div[3]/div[2]/div/div/table/tbody/tr[*]/td[5]/div/div");
            }

            if (res.Count == 0)
            {
                Global.ShowError("Failed to load versions...");
            }

            foreach (HtmlNodeNavigator node in res)
            {
                var urlsplit = node.CurrentNode.GetAttributeValue("href", "").Split('/');
                res2.MoveNext();
                HtmlNodeNavigator versionNode = (HtmlNodeNavigator)res2.Current;

                Names.Add(node.CurrentNode.InnerText + " - " + versionNode.CurrentNode.InnerText.Replace("\r\n", "").Trim());
                Ids.Add(urlsplit[urlsplit.Length - 1]);
            }
        }
コード例 #7
0
        protected bool RunHtmlPathSearch()
        {
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(htmlText);
            HtmlNodeNavigator navigator = (HtmlNodeNavigator)document.CreateNavigator();

            //https://stackoverflow.com/questions/1390568/how-can-i-match-on-an-attribute-that-contains-a-certain-string
            //sample htmlPath to get download link: @"//a[contains(@class, 'ModDetails_hidden')]//@href"
            //HtmlNodeCollection clientVersionNode = node.SelectNodes(@"//div[contains(@class, 'ModDetails_label')]");
            Logging.Debug(LogOptions.ClassName, "Searching using html path: {0}", HtmlPath);
            try
            {
                ResultNode = navigator.SelectSingleNode(HtmlPath) as HtmlNodeNavigator;
            }
            catch (XPathException ex)
            {
                Logging.Exception(ex.ToString());
                return(false);
            }

            if (ResultNode == null)
            {
                Logging.Info(LogOptions.ClassName, "Result was not found");
                return(false);
            }
            else
            {
                Logging.Info(LogOptions.ClassName, "HtmlPath results in node value '{0}' of type '{1}'", ResultNode.InnerXml, ResultNode.NodeType.ToString());
                Logging.Info(LogOptions.ClassName, "Result value as text: {0}\nResult inner html: {1}\nResult outer html: {2}", ResultNode.Value, ResultNode.InnerXml, ResultNode.OuterXml);
                ResultString = ResultNode.ToString();
                return(true);
            }
        }
コード例 #8
0
 private void AddOrganizer(MySqlConnection connect, HtmlNodeNavigator navigator, out int organiserId)
 {
     organiserId = 0;
     if (!string.IsNullOrEmpty(_tn.OrgName))
     {
         var selectOrg =
             $"SELECT id_organizer FROM {AppBuilder.Prefix}organizer WHERE full_name = @full_name";
         var cmd3 = new MySqlCommand(selectOrg, connect);
         cmd3.Prepare();
         cmd3.Parameters.AddWithValue("@full_name", _tn.OrgName);
         var dt3      = new DataTable();
         var adapter3 = new MySqlDataAdapter {
             SelectCommand = cmd3
         };
         adapter3.Fill(dt3);
         if (dt3.Rows.Count > 0)
         {
             organiserId = (int)dt3.Rows[0].ItemArray[0];
         }
         else
         {
             var phone = navigator.SelectSingleNode(
                 "//td[contains(., 'Номер контактного телефона заказчика')]/following-sibling::td")
                         ?.Value?.Trim() ??
                         "";
             var email = navigator.SelectSingleNode(
                 "//td[contains(., 'Контактный адрес e-mail:')]/following-sibling::td")
                         ?.Value
                         ?.Trim() ??
                         "";
             var contactPerson = navigator.SelectSingleNode(
                 "//td[contains(., 'Контактное лицо:') or contains(., 'Ответственное лицо:')]/following-sibling::td")
                                 ?.Value?.Trim() ??
                                 "";
             var postAddr = navigator.SelectSingleNode(
                 "//td[contains(., 'Почтовый адрес заказчика:')]/following-sibling::td")
                            ?.Value?.Trim() ??
                            "";
             var address = navigator.SelectSingleNode(
                 "//td[contains(., 'Местонахождение заказчика:')]/following-sibling::td")
                           ?.Value?.Trim() ?? "";
             var addOrganizer =
                 $"INSERT INTO {AppBuilder.Prefix}organizer SET full_name = @full_name, contact_phone = @contact_phone, contact_person = @contact_person, contact_email = @contact_email, post_address = @post_address, fact_address = @fact_address";
             var cmd4 = new MySqlCommand(addOrganizer, connect);
             cmd4.Prepare();
             cmd4.Parameters.AddWithValue("@full_name", _tn.OrgName);
             cmd4.Parameters.AddWithValue("@contact_phone", phone);
             cmd4.Parameters.AddWithValue("@contact_person", contactPerson);
             cmd4.Parameters.AddWithValue("@contact_email", email);
             cmd4.Parameters.AddWithValue("@post_address", postAddr);
             cmd4.Parameters.AddWithValue("@fact_address", address);
             cmd4.ExecuteNonQuery();
             organiserId = (int)cmd4.LastInsertedId;
         }
     }
 }
コード例 #9
0
 private void FillPurName(HtmlNodeNavigator navigator)
 {
     if (string.IsNullOrEmpty(_tn.PurName))
     {
         var firstPurName = navigator.SelectSingleNode(
             "//div[@class = 's2']")
                            ?.Value?.Trim() ??
                            "";
         _tn.PurName = $"{_tn.FullPw} {firstPurName}";
     }
 }
コード例 #10
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var rawDate = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText;

            if (rawDate != null)
            {
                return(ParseDate(settings, rawDate));
            }

            return(null);
        }
コード例 #11
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var node = nodeNavigator?.CurrentNode;

            HtmlAgilityPack.HtmlNode sibling = null;
            var level          = 0;
            var maxLevel       = 3;
            var maxTitleLength = 200;

            if (settings != null && settings.ContainsKey("_maxStepsUpward") && ((JValue)settings["_maxStepsUpward"]).Type == JTokenType.Integer)
            {
                maxLevel = ((JValue)settings["_maxStepsUpward"]).ToObject <int>();
            }

            if (settings != null && settings.ContainsKey("_maxTitleLength") && ((JValue)settings["_maxTitleLength"]).Type == JTokenType.Integer)
            {
                maxTitleLength = ((JValue)settings["_maxTitleLength"]).ToObject <int>();
            }

            do
            {
                level++;
                sibling = sibling != null ? sibling.PreviousSibling : node.PreviousSibling;

                if (sibling != null && this.IsAllowedTypeRecursive(sibling))
                {
                    var siblingInnerText = sibling.InnerText;

                    if (!string.IsNullOrWhiteSpace(siblingInnerText))
                    {
                        var text = HtmlEntity.DeEntitize(siblingInnerText).Trim();

                        if (text.Length <= maxTitleLength)
                        {
                            return(text);
                        }
                        else if (text.Length > 0)
                        {
                            // We will stop if the first title candidate we find is not valid, but we will continue if the text was empty
                            return(null);
                        }
                    }
                }

                // At this point we did not return any text, so the text node is empty, or this is a comment.
                // We will decrement level so we ignore this node completely
                if (sibling != null && (sibling.NodeType == HtmlAgilityPack.HtmlNodeType.Text || sibling.NodeType == HtmlAgilityPack.HtmlNodeType.Comment))
                {
                    level--;
                }
            }while (sibling != null && level < maxLevel);

            return(null);
        }
コード例 #12
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var text = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText;

            if (text != null)
            {
                return(WebUtility.UrlDecode(text));
            }

            return(null);
        }
コード例 #13
0
ファイル: HttpClient.cs プロジェクト: RhaLabs/EvocoWebCrawler
        public List <KeyValuePair <string, System.IO.Stream> > GetHttpFileAttachment(System.IO.Stream htmlDocumentStream)
        {
            var navigator = new HtmlNodeNavigator(htmlDocumentStream);

            // get new view tokens
            this.TryGetTokens(navigator, out this.viewState, out this.eventValidation);

            var formAction = navigator.CurrentNode.SelectSingleNode(".//form[@name='attachForm']").GetAttributeValue("action", "");

            formAction = formAction.Replace("&amp;", "&");

            var uriToFile = "http://www.bldgportal.com/RFI/Application/" + formAction;

            // all <a> elements with the "title" attribute
            var fileNodes = navigator.CurrentNode.SelectNodes(@".//a[@title]");

            var attachments = new List <KeyValuePair <string, System.IO.Stream> >();

            if (fileNodes == null)
            {
                return(attachments);
            }

            foreach (var fileLink in fileNodes)
            {
                var target = fileLink.Id;

                target = target.Replace('_', '$');

                var postData = new List <KeyValuePair <string, string> >();

                postData.Add(new KeyValuePair <string, string>("__VIEWSTATE", viewState));
                postData.Add(new KeyValuePair <string, string>("__EVENTVALIDATION", eventValidation));
                postData.Add(new KeyValuePair <string, string>("__EVENTTARGET", target));
                postData.Add(new KeyValuePair <string, string>("__EVENTARGUMENT", ""));
                postData.Add(new KeyValuePair <string, string>("validFileTypes", "doc, docx, xls, xlsx, xlsm, dwg, dwgx, dwf, dwfx, bmp, gif, jpg, jpeg, tif, tiff, pdf, txt, rtf, ai"));
                postData.Add(new KeyValuePair <string, string>("fileID", ""));

                var content = new FormUrlEncodedContent(postData);

                var response = client.PostAsync(uriToFile, content).Result;

                //  var fileStream = System.IO.File.Create( String.Format(@"..\{0}",fileLink.GetAttributeValue("title", "") ) );
                // var httpStream = response.Content.ReadAsStreamAsync().Result;
                //        httpStream.CopyTo(fileStream);
//
                // fileStream.Flush();
                attachments.Add(new KeyValuePair <string, System.IO.Stream>
                                    (String.Format(@"..\{0}", fileLink.GetAttributeValue("title", "")),
                                    response.Content.ReadAsStreamAsync().Result));
            }

            return(attachments);
        }
コード例 #14
0
        /// <summary>
        /// 根据相对路径XPath从单一Item的BaseNode节点提取某一个字段的Node的InnerText
        /// </summary>
        /// <param name="BaseNode">一个Item的根节点</param>
        /// <param name="RelXPath">相对XPath路径</param>
        /// <param name="CleanConnectionMark">是否清洗文本</param>
        /// <returns></returns>
        internal static string ExtractInnerTextFromBaseNode(HtmlNode BaseNode, string RelXPath, int postion, bool CleanConnectionMark = true)
        {
            if (BaseNode == null)
            {
                return(null);
            }

            if (string.IsNullOrWhiteSpace(RelXPath))
            {
                if (CleanConnectionMark)
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode)));
                }
                else
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode), true, true, true, false, true, false));
                }
            }

            string innerTextValue = "";

            try
            {
                HtmlNodeNavigator navigator = (HtmlNodeNavigator)BaseNode.CreateNavigator();
                var node = navigator.SelectSingleNode(RelXPath);
                innerTextValue = node.Value;
            }
            catch (Exception ex)
            { }

            if (string.IsNullOrWhiteSpace(innerTextValue))
            {
                IEnumerable <HtmlNode> MatchNodes = BaseNode.SelectNodes(RelXPath);
                if (MatchNodes != null)
                {
                    MatchNodes = MatchNodes.Where(n => !string.IsNullOrEmpty(XPathUtility.InnerTextNonDescendants(n)));
                }
                if (!string.IsNullOrWhiteSpace(RelXPath) && (MatchNodes == null || MatchNodes.Count() == 0))
                {
                    return(null);
                }

                innerTextValue = XPathUtility.InnerTextNonDescendants(MatchNodes.First());
            }

            if (CleanConnectionMark)
            {
                return(TextCleaner.FullClean(innerTextValue));
            }
            else
            {
                return(TextCleaner.FullClean(innerTextValue, true, true, true, false, true, false));
            }
        }
コード例 #15
0
        public override INode[] SelectNodes(XPathExpression expr)
        {
            var nav  = Wrapped.CreateNavigator();
            var iter = nav.Select(expr);
            var ret  = new INode[iter.Count];

            while (iter.MoveNext())
            {
                HtmlNodeNavigator n = (HtmlNodeNavigator)iter.Current;
                ret[iter.CurrentPosition - 1] = new HtmlNodeWrapper(n);
            }
            return(ret);
        }
コード例 #16
0
        private void FillNoticeVer(HtmlNodeNavigator navigator, out string noticeVer)
        {
            var comments = navigator.SelectSingleNode(
                "//td[b[. = 'Комментарии:']]")
                           ?.Value?.Trim() ??
                           "";
            var providingDocumentation = navigator.SelectSingleNode(
                "//td[contains(., 'Порядок предоставления документации по закупке:')]/following-sibling::td")
                                         ?.Value?.Trim() ??
                                         "";

            noticeVer = $"{comments}\nПорядок предоставления документации по закупке: {providingDocumentation}".Trim();
        }
コード例 #17
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlNode> logicalParents)
        {
            string text = nodeNavigator?.Value;

            if (text != null)
            {
                this.settings = settings;
                LoadSettings();
                text = HtmlEntity.DeEntitize(text).Trim();
                return(RemovePatternsFromText(text));
            }
            return(null);
        }
コード例 #18
0
ファイル: HttpClient.cs プロジェクト: RhaLabs/EvocoWebCrawler
        private void LogMeIn(string uri)
        {
            var body = this.Navigate(uri);

            var navigator = new HtmlNodeNavigator(body);

            this.TryGetTokens(navigator, out this.viewState, out this.eventValidation);

            var postData = new List <KeyValuePair <string, string> >();

            postData.Add(new KeyValuePair <string, string>("__VIEWSTATE", viewState));
            postData.Add(new KeyValuePair <string, string>("__EVENTVALIDATION", eventValidation));
            postData.Add(new KeyValuePair <string, string>("UserNameTextBox", creds.Username));
            postData.Add(new KeyValuePair <string, string>("PasswordTextBox", creds.Password));
            postData.Add(new KeyValuePair <string, string>("ConfCheckBox", "on"));
            postData.Add(new KeyValuePair <string, string>("LoginButton", "Authenticate"));
            postData.Add(new KeyValuePair <string, string>("RequestPasswordTextBox", ""));

            var content = new FormUrlEncodedContent(postData);

            var response = client.PostAsync(uri + "?TIMEOUT=true", content).Result;

            navigator = new HtmlNodeNavigator(body);

            var doc = navigator.CurrentDocument;

            doc.Save(@"..\login.html");

            uri = @"https://www.bldgportal.com/Application/PortalMain.aspx";

            navigator = new HtmlNodeNavigator(this.Navigate(uri));

            doc = navigator.CurrentDocument;
            doc.Save(@"..\portal.html");

            var success = navigator.MoveToId("UserIdTable");

            var iterator = navigator.CurrentNode.SelectNodes(@".//a[@onclick]");

            var nodeLin = iterator[0];

            // get and use the SID to log into the RFI subsystem
            var regex = new System.Text.RegularExpressions.Regex(@"(?<sid>\?SID=.*)'");

            var match = regex.Match(nodeLin.Attributes["onclick"].Value);

            var group = match.Groups;

            this.GlobalEvocoId = group["sid"].Value;
        }
コード例 #19
0
ファイル: tools.cs プロジェクト: gorangrubic/imbNLP
        /// <summary>
        /// Builds the content tree out of <see cref="HtmlDocument"/>
        /// </summary>
        /// <param name="htmlDoc">The HTML document</param>
        /// <param name="__name">The name of the root</param>
        /// <returns></returns>
        public static nodeTree buildTree(this HtmlDocument htmlDoc, string __name, Boolean allowTitle = false, Boolean allowMeta = false)
        {
            nodeTree output = new nodeTree(__name, htmlDoc);

            XPathNodeIterator iterator = htmlDoc.CreateNavigator().Select("//*[text()][count(*)=0]");

            //XPathNodeIterator iterator = htmlDoc.CreateNavigator().SelectDescendants(System.Xml.XPath.XPathNodeType.Text, false);
            while (iterator.MoveNext())
            {
                XPathNavigator    current       = iterator.Current;
                HtmlNodeNavigator htmlNavigator = current as HtmlNodeNavigator;
                string            sp            = htmlNavigator.CurrentNode.XPath.Replace("/", "\\");
                string            cn            = "";

                if (htmlNavigator.CurrentNode.Name.ToLower() == "title")
                {
                }

                if (isTagAcceptable(htmlNavigator.CurrentNode, null, allowTitle, allowMeta))
                {
                    cn = "";
                    cn = htmlNavigator.CurrentNode.InnerText;

                    //if (htmlNavigator.CurrentNode.checkTextHtmlConsistensy())
                    //{

                    //}

                    cn = cn.htmlContentProcess().Trim();
                    if (!cn.isNullOrEmptyString())
                    {
                        graphWrapNode <htmlWrapper> nn = output.Add(sp, htmlNavigator.CurrentNode.Clone());
                        nn.item.content = cn;
                        nn.item.xPath   = sp;
                        nn.item.path    = nn.path;
                        // nodesWithText.AddNewLeaf(sp, htmlNavigator.CurrentNode.Clone(), report, cn);
                    }
                    else
                    {
                    }
                }
                else
                {
                }
            }

            return(output);
        }
コード例 #20
0
        private static ScrapeInfo useXPath()
        {
            ScrapeInfo values = new ScrapeInfo();

            HtmlWeb           webClient          = new HtmlWeb();
            HtmlDocument      firstInventoryPage = webClient.Load(url);
            HtmlNodeNavigator navigator          = (HtmlNodeNavigator)firstInventoryPage.DocumentNode.SelectSingleNode("//div[contains(@class,\"hproduct\")][@data-index-position=\"1\"]").CreateNavigator();

            values.Vin      = navigator.SelectSingleNode("@data-vin").Value;
            values.Price    = navigator.SelectSingleNode("//span[contains(@class,\"internetPrice\")]//span[@class=\"value\"]/text()").Value;
            values.Make     = navigator.SelectSingleNode("@data-make").Value;
            values.Model    = navigator.SelectSingleNode("@data-model").Value;
            values.PhotoUrl = navigator.SelectSingleNode("//div[@class=\"media\"]//img/@src").Value;

            return(values);
        }
コード例 #21
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var text = nodeNavigator?.Value ?? nodeNavigator?.CurrentNode?.InnerText;

            if (text != null)
            {
                int intVal;

                if (int.TryParse(text, out intVal))
                {
                    return(intVal);
                }
            }

            return(null);
        }
コード例 #22
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var ret         = new StringBuilder();
            var foundParent = false;

            var currentNode = nodeNavigator?.CurrentNode;

            if (logicalParents != null && logicalParents.Count >= 2)
            {
                // We will skip out immediate parent because that's the list, we need the parent of the list, which is out grandparent
                var grandParentNode = logicalParents[logicalParents.Count - 2];
                HtmlAgilityPack.HtmlNode parentNode = grandParentNode;

                if (settings != null && settings.ContainsKey("_startingXPath") && ((JValue)settings["_startingXPath"]).Type == JTokenType.String)
                {
                    var startingXPath = ((JValue)settings["_startingXPath"]).ToObject <string>();

                    var nodes = parentNode.SelectNodes(startingXPath);

                    if (nodes != null && nodes.Count > 0)
                    {
                        parentNode = nodes[0];
                    }
                    else
                    {
                        return(0);
                    }
                }

                while (currentNode != null && currentNode != parentNode && !foundParent)
                {
                    var siblingText = this.GetTextFromSiblings(currentNode, parentNode, ref foundParent);

                    if (!string.IsNullOrEmpty(siblingText))
                    {
                        ret.Append(siblingText);
                        ret.Append(" ");
                    }

                    currentNode = currentNode.ParentNode;
                }
            }

            var text = ret.ToString().Trim();

            return(text.Length);
        }
コード例 #23
0
        public ParsingResults Parse(System.Net.WebClient client, List <Models.Product> ProductList)
        {
            HtmlDocument doc = new HtmlDocument();

            for (int i = 0; i < PagesLinks_.Count; i++)
            {
                try
                {
                    string source = client.DownloadString(PagesLinks_[i]);

                    Console.Write(PagesLinks_[i] + " " + i.ToString() + "\n");
                    doc.LoadHtml(source);
                    HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator();
                    var productNodes            = navigator.Select(Rules_.DetailsXpath);
                    AddProducts(productNodes);
                    var pagesNodes = navigator.Select(Rules_.PaginationXpath);
                    AddPages(pagesNodes);
                }
                catch (System.Net.WebException) { }
            }
            ParsingResults res = new ParsingResults();

            for (int i = 0; i < ProductsLinks_.Count; i++)
            {
                string source = client.DownloadString(ProductsLinks_[i]);
                Console.Write(ProductsLinks_[i] + " " + i.ToString() + "\n");
                doc.LoadHtml(source);
                HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator();
                var product = IsAlreadyParsed(source, ProductList);
                if (product != null)
                {
                    string       priceValue = ConnectStrings(navigator.Select(Rules_.PriceXpath));
                    Models.Price price      = new Models.Price()
                    {
                        Product = product, Date = DateTime.Now, PriceValue = priceValue
                    };
                    product.Price = priceValue;
                    res.AddedPrices.Add(price);
                }
                else
                {
                    ParseNewProduct(navigator, product, res, ProductsLinks_[i]);
                }
            }
            return(res);
        }
コード例 #24
0
        private void FillBidAndScorDates(HtmlNodeNavigator navigator, out DateTime scoringDate,
                                         out DateTime biddingDate)
        {
            var scoringDateT =
                navigator.SelectSingleNode(
                    "//td[contains(., 'Дата и время рассмотрения заявок:') or contains(., 'Дата рассмотрения заявок:')]/following-sibling::td")
                ?.Value?.Trim() ??
                "";

            scoringDate = scoringDateT.ParseDateUn("dd.MM.yyyy HH:mm");
            var biddingDateT =
                navigator.SelectSingleNode(
                    "//td[contains(., 'Дата начала аукциона')]/following-sibling::td")?.Value?.Trim() ??
                "";

            biddingDate = biddingDateT.ParseDateUn("dd.MM.yyyy HH:mm");
        }
コード例 #25
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var separator = ",";
            var trim      = false;

            var node = nodeNavigator?.CurrentNode;

            if (node != null)
            {
                var text = node.InnerText;

                if (!string.IsNullOrWhiteSpace(text))
                {
                    if (settings != null && settings.ContainsKey("_separator") && ((JValue)settings["_separator"]).Type == JTokenType.String)
                    {
                        separator = settings["_separator"].ToString();
                    }

                    if (settings != null && settings.ContainsKey("_trim") && ((JValue)settings["_trim"]).Type == JTokenType.Boolean)
                    {
                        trim = (bool)((JValue)settings["_trim"]).Value;
                    }
                }

                try
                {
                    var textParts = text.Split(new string[] { separator }, StringSplitOptions.None);

                    if (trim)
                    {
                        for (var i = 0; i < textParts.Length; i++)
                        {
                            textParts[i] = HtmlEntity.DeEntitize(textParts[i]).Trim();
                        }
                    }

                    return(new JArray(textParts));
                }
                catch (ArgumentException)
                {
                }
            }

            return(null);
        }
コード例 #26
0
        private void AddPurObjectFirst(MySqlConnection connect, int customerId, HtmlNodeNavigator nav, int idLot,
                                       string lotName, string sum)
        {
            var okpd2 = nav.SelectSingleNode(
                "//td[contains(., 'Категория ОКПД2:')]/following-sibling::td/div/b")
                        ?.Value?.Trim() ??
                        "";
            var okpdName = nav.SelectSingleNode(
                "//td[contains(., 'Категория ОКПД2:')]/following-sibling::td/div")
                           ?.Value?.ReplaceHtmlEntyty().Trim() ??
                           "";

            if (!string.IsNullOrEmpty(okpd2))
            {
                okpdName = okpdName.Replace(okpd2, "");
            }

            if (okpdName.Contains("Показать все"))
            {
                okpdName = "";
            }

            var quantity = nav.SelectSingleNode(
                "//td[contains(., 'Количество:')]/following-sibling::td")
                           ?.Value?.Trim().ExtractPrice() ??
                           "";
            var insertLotitem =
                $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, sum = @sum, okpd2_code = @okpd2_code, okpd2_group_code = @okpd2_group_code, okpd2_group_level1_code = @okpd2_group_level1_code, okpd_name = @okpd_name, quantity_value = @quantity_value, customer_quantity_value = @customer_quantity_value, okei = @okei, price = @price";
            var cmd19 = new MySqlCommand(insertLotitem, connect);

            cmd19.Prepare();
            cmd19.Parameters.AddWithValue("@id_lot", idLot);
            cmd19.Parameters.AddWithValue("@id_customer", customerId);
            cmd19.Parameters.AddWithValue("@name", lotName);
            cmd19.Parameters.AddWithValue("@sum", sum);
            cmd19.Parameters.AddWithValue("@okpd2_code", okpd2);
            cmd19.Parameters.AddWithValue("@okpd2_group_code", "");
            cmd19.Parameters.AddWithValue("@okpd2_group_level1_code", "");
            cmd19.Parameters.AddWithValue("@okpd_name", okpdName);
            cmd19.Parameters.AddWithValue("@quantity_value", quantity);
            cmd19.Parameters.AddWithValue("@customer_quantity_value", quantity);
            cmd19.Parameters.AddWithValue("@okei", "");
            cmd19.Parameters.AddWithValue("@price", "");
            cmd19.ExecuteNonQuery();
        }
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var node = nodeNavigator?.CurrentNode;

            if (node != null)
            {
                var text = node.InnerText;

                if (text != null)
                {
                    text = HtmlEntity.DeEntitize(text).Trim();
                    text = ExtraWhitespacesRegex.Replace(text, " ");
                    return(text);
                }
            }

            return(null);
        }
コード例 #28
0
ファイル: Form1.cs プロジェクト: wojprz/Pobieranie
        //private void ProgressChanged(object sender, DownloadProgressChangedEventArgs e)
        //{
        //    progressBar.Value = e.ProgressPercentage;
        //}
        //private void click_Click(object sender, EventArgs e)
        //{
        //    WebClient webClient = new WebClient();
        //    webClient.DownloadFileCompleted += new AsyncCompletedEventHandler(DownloadCompleted);
        //    webClient.DownloadProgressChanged += new DownloadProgressChangedEventHandler(ProgressChanged);

        //    string sourceFile = $"http://openload.co/stream/Ki5y8-mPcoE~1558022792~95.108.0.0~mdmeUiAb?";
        //    webClient.DownloadFileAsync(new Uri(sourceFile), "test.mp4");
        //}
        //private void DownloadCompleted(object sender, AsyncCompletedEventArgs e)
        //{
        //    MessageBox.Show("The download is completed!");
        //}

        private void button1_Click(object sender, EventArgs e)
        {
            using (var client = new WebClient())
            {
                string page = adres.Text;
                string html = client.DownloadString(page);

                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);



                HtmlNodeNavigator navigator = (HtmlNodeNavigator)doc.CreateNavigator();
                string            xPath     = "//*[@id=\"olvideo_html5_api\"]";

                string val = navigator.SelectSingleNode(xPath).Value;
                adres.Text = val;
            }
        }
コード例 #29
0
        private List <ResultEntity> GetNodeValue(string html, string xpath)
        {
            var list = new List <ResultEntity>();
            var doc  = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(html);
            var root = doc.DocumentNode;
            HtmlNodeNavigator navigator = (HtmlNodeNavigator)root.CreateNavigator();
            var nodes = navigator.Select(xpath);

            ;           foreach (HtmlNodeNavigator node in nodes)
            {
                list.Add(new ResultEntity()
                {
                    Value = node.Value, Path = node.CurrentNode.XPath
                });
            }

            return(list);
        }
コード例 #30
0
        private void AddLots(HtmlDocument htmlDoc, HtmlNodeNavigator navigator, MySqlConnection connect,
                             int idTender,
                             int customerId)
        {
            var lotAdded = false;
            var lots     = htmlDoc.DocumentNode.SelectNodes(
                "//div[@class = 'expandable-text short']//a[contains(., 'Лот №')]") ??
                           new HtmlNodeCollection(null);

            foreach (var lot in lots)
            {
                AddLot(navigator, connect, idTender, customerId, lot, out var lotWasAdded);
                lotAdded = lotWasAdded;
            }

            if (lots.Count < 1 || !lotAdded)
            {
                AddOneLot(navigator, connect, idTender, customerId);
            }
        }