C# (CSharp) InternetDataGetter DataGetter.GetHtmlpage 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: InternetDataGetter

클래스/타입: DataGetter

메소드/함수: GetHtmlpage

hotexamples.com에서의 예제들: 5

C# (CSharp) InternetDataGetter DataGetter.GetHtmlpage - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 InternetDataGetter.DataGetter.GetHtmlpage에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GetDataByXPATH(9)

GetHtmlpage(5)

GetRandomNumber(3)

GetDataByID(1)

예제 #1

파일 보기

        public static List <string> GetProductsUri(string pagination_uri)
        {
            //go over the product page and extract product url's
            List <string> productsLinks = new List <string>();

            HtmlDocument dataPage = DataGetter.GetHtmlpage(new Uri(pagination_uri));

            //now get all product url's from page
            List <string> elements = new List <string>();

            elements.Add("//li[@class='productNumberValue']");
            List <KeyValuePair <string, HtmlNodeCollection> > data = DataGetter.GetDataByXPATH(dataPage, elements);

            HtmlNodeCollection productsNodes = data[0].Value;

            for (int j = 0; /*j < numProductsToGet && */ j < productsNodes.Count; j++)
            {
                string link          = productsNodes[j].InnerHtml;
                string extractedLink = ExtractLinkFromHtml(productsNodes[j], "href=", "\">");
                productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
            }

            System.Threading.Thread.Sleep((int)DataGetter.GetRandomNumber(5.0, 15.0) * 1000);

            return(productsLinks);
        }

예제 #2

파일 보기

        public static Product GetProduct(string product_uri)
        {
            Product       p        = new Product();
            List <string> elements = new List <string>();
            List <string> headers  = new List <string>();

            headers.Add("Components");
            headers.Add("Application");
            headers.Add("Features and Benefits");
            headers.Add("General description");
            headers.Add("Packaging");
            headers.Add("Reconstitution");
            headers.Add("Other Notes");
            headers.Add("Legal Information");
            headers.Add("Caution");
            headers.Add("Biochem/physiol Actions");
            headers.Add("Preparation Note");

            elements.Add("//div[@class='descriptionContent']");

            HtmlDocument dataPage = DataGetter.GetHtmlpage(new Uri(product_uri));

            List <KeyValuePair <string, HtmlNodeCollection> > dataDescription = DataGetter.GetDataByXPATH(dataPage, elements);

            p.Description = SigmaAldrichParser.ParseDescription(dataDescription, headers);
            p.Properties  = SigmaAldrichParser.ParseDetailProperties(dataPage.DocumentNode);

            List <string> elements1 = new List <string>();

            elements1.Add("//p[@class='product-name']");
            p.Name = GetProductName(dataPage, elements1);

            return(p);
        }

예제 #3

파일 보기

        //string ProductName = GetProductName(uri, elements);

        public static void Parse(Uri uri)
        {
            HtmlDocument entirePage = DataGetter.GetHtmlpage(uri);
            //string productName = GetProductName(entirePage, "//h2[@class='product-name']");
            //string description = GetDescription(entirePage, "//meta[@name='description']");
            //specifications Specifications = GetSpecifications(entirePage, "//td[@class='data']");
            List <string> productUris = GetProductUris(entirePage, "//li");
        }

예제 #4

파일 보기

        public static List <string> GetAllProductsUri(List <string> pagination_uri, int num_pages_to_get)
        {
            //go over all product pages and extract product url's
            List <string> productsLinks = new List <string>();

            //int numProductsToGet = num_pages_to_get;

            //if (numProductsToGet == 0)
            //{
            //    numProductsToGet = pagination_uri.Count;
            //}

            for (int i = 0; i < pagination_uri.Count; i++)
            {
                HtmlDocument dataPage = DataGetter.GetHtmlpage(new Uri(pagination_uri[i]));

                //now get all product url's from page
                List <string> elements = new List <string>();
                elements.Add("//li[@class='productNumberValue']");
                List <KeyValuePair <string, HtmlNodeCollection> > data = DataGetter.GetDataByXPATH(dataPage, elements);

                HtmlNodeCollection productsNodes = data[0].Value;

                for (int j = 0; /*j < numProductsToGet && */ j < productsNodes.Count; j++)
                {
                    string link          = productsNodes[j].InnerHtml;
                    string extractedLink = ExtractLinkFromHtml(productsNodes[j], "href=", "\">");
                    productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
                }

                //foreach (HtmlNode node in productsNodes)
                //{
                //    string link = node.InnerHtml;
                //    string extractedLink = ExtractLinkFromHtml(node, "href=", "\">");
                //    productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
                //}

                //if (productsLinks.Count < numProductsToGet)
                //{
                System.Threading.Thread.Sleep((int)DataGetter.GetRandomNumber(5.0, 15.0) * 1000);
                //}
                //else
                //{
                //    break;
                //}
            }

            return(productsLinks);
        }

예제 #5

파일 보기

        public static List <string> GetCategoryPaginationUrls(string category_url, int pages_to_get)
        {
            int          numPagesToGet = pages_to_get;
            HtmlDocument doc           = DataGetter.GetHtmlpage(new Uri(category_url));

            //infer page name from 2nd page link
            List <string> liElements = new List <string>();

            liElements.Add("searchResultsPagination");
            List <KeyValuePair <string, HtmlNode> > paginationElements = DataGetter.GetDataByID(doc, "div", liElements);

            List <string> liElements1 = new List <string>();

            liElements1.Add("pg2");
            List <KeyValuePair <string, HtmlNode> > page2 = DataGetter.GetDataByID(doc, "li", liElements1);
            string page2Link = ExtractLinkFromHtml(page2[0].Value, "href", "'>");


            HtmlNodeCollection divNodeChildren  = paginationElements[paginationElements.Count - 1].Value.ChildNodes;
            HtmlNode           divNodeChildren1 = divNodeChildren.First(x => x.Name == "ul");
            HtmlNode           lastPage         = divNodeChildren1.ChildNodes[divNodeChildren1.ChildNodes.Count - 2];
            string             lastPageLink     = ExtractLinkFromHtml(lastPage, "href", "'>");
            int lastPageNumber = ExtractNumberFromString(lastPageLink, "page=");

            //Now build the pagination links...
            string[] pageParts = page2Link.Split(new string[] { "page=" }, StringSplitOptions.None);
            //pageParts[0] = pageParts[0].Substring(1);
            pageParts[1] = RemoveNumberFromStartOfString(pageParts[1]);
            //pageParts[1] = pageParts[1].Substring(0, pageParts[1].Length - 1);
            List <string> paginationLinks = new List <string>();

            if (numPagesToGet == 0)
            {
                //get all products
                numPagesToGet = lastPageNumber;
            }

            for (int i = 0; i < numPagesToGet; i++)
            {
                paginationLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + pageParts[0] + "page=" + (i + 1) + pageParts[1]);
            }

            return(paginationLinks);
        }