Пример #1
0
        public static List <string> GetProductsUri(string pagination_uri)
        {
            //go over the product page and extract product url's
            List <string> productsLinks = new List <string>();

            HtmlDocument dataPage = DataGetter.GetHtmlpage(new Uri(pagination_uri));

            //now get all product url's from page
            List <string> elements = new List <string>();

            elements.Add("//li[@class='productNumberValue']");
            List <KeyValuePair <string, HtmlNodeCollection> > data = DataGetter.GetDataByXPATH(dataPage, elements);

            HtmlNodeCollection productsNodes = data[0].Value;

            for (int j = 0; /*j < numProductsToGet && */ j < productsNodes.Count; j++)
            {
                string link          = productsNodes[j].InnerHtml;
                string extractedLink = ExtractLinkFromHtml(productsNodes[j], "href=", "\">");
                productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
            }

            System.Threading.Thread.Sleep((int)DataGetter.GetRandomNumber(5.0, 15.0) * 1000);

            return(productsLinks);
        }
Пример #2
0
        public static List <string> GetAllProductsUri(List <string> pagination_uri, int num_pages_to_get)
        {
            //go over all product pages and extract product url's
            List <string> productsLinks = new List <string>();

            //int numProductsToGet = num_pages_to_get;

            //if (numProductsToGet == 0)
            //{
            //    numProductsToGet = pagination_uri.Count;
            //}

            for (int i = 0; i < pagination_uri.Count; i++)
            {
                HtmlDocument dataPage = DataGetter.GetHtmlpage(new Uri(pagination_uri[i]));

                //now get all product url's from page
                List <string> elements = new List <string>();
                elements.Add("//li[@class='productNumberValue']");
                List <KeyValuePair <string, HtmlNodeCollection> > data = DataGetter.GetDataByXPATH(dataPage, elements);

                HtmlNodeCollection productsNodes = data[0].Value;

                for (int j = 0; /*j < numProductsToGet && */ j < productsNodes.Count; j++)
                {
                    string link          = productsNodes[j].InnerHtml;
                    string extractedLink = ExtractLinkFromHtml(productsNodes[j], "href=", "\">");
                    productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
                }

                //foreach (HtmlNode node in productsNodes)
                //{
                //    string link = node.InnerHtml;
                //    string extractedLink = ExtractLinkFromHtml(node, "href=", "\">");
                //    productsLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + "/" + extractedLink);
                //}

                //if (productsLinks.Count < numProductsToGet)
                //{
                System.Threading.Thread.Sleep((int)DataGetter.GetRandomNumber(5.0, 15.0) * 1000);
                //}
                //else
                //{
                //    break;
                //}
            }

            return(productsLinks);
        }
Пример #3
0
        public static List <Description> GetAllProductsForCategory(string category, int num_products_to_get, int pages_to_get)
        {
            string categoryUri      = GetCategoryUri(category);
            int    numProductsToGet = 0;

            List <string> paginationUri = GetCategoryPaginationUrls(categoryUri, pages_to_get);

            List <string> productsUris = GetAllProductsUri(paginationUri, num_products_to_get);

            List <Description> products = new List <Description>();
            List <string>      elements = new List <string>();
            List <string>      headers  = new List <string>();

            headers.Add("Components");
            headers.Add("Application");
            headers.Add("Features and Benefits");
            headers.Add("General description");
            headers.Add("Packaging");
            headers.Add("Reconstitution");
            headers.Add("Other Notes");
            headers.Add("Legal Information");
            headers.Add("Caution");
            headers.Add("Biochem/physiol Actions");
            headers.Add("Preparation Note");

            elements.Add("//div[@class='descriptionContent']");

            if (num_products_to_get == 0)
            {
                //get all products
                numProductsToGet = productsUris.Count;
            }

            for (int i = 0; i < num_products_to_get; i++)
            {
                List <KeyValuePair <string, HtmlNodeCollection> > dataDescription = DataGetter.GetDataByXPATH(new Uri(productsUris[i]), elements);
                Description description = SigmaAldrichParser.ParseDescription(dataDescription, headers);

                products.Add(description);

                System.Threading.Thread.Sleep((int)DataGetter.GetRandomNumber(5.0, 15.0) * 1000);
            }

            return(products);
        }