private List <MedicineRaw> GetItems(int pagina)
        {
            HtmlNodeCollection trs;

            while (true)
            {
                var    document     = new HtmlDocument();
                string responseText = this.GetResponseTextWithCache(pagina);
                document.LoadHtml(responseText);

                trs = document.DocumentNode.SelectNodes("//*[@class='grid']/tr");

                if (trs.Count > 2)
                {
                    break;
                }
            }

            var medicines = new List <MedicineRaw>(20);

            var trs2 = trs.Skip(1).Reverse().Skip(1).Reverse().ToArray();

            foreach (HtmlNode tr in trs2)
            {
                var medicine = new MedicineRaw
                {
                    ActiveIngredient = tr.ChildNodes[0].InnerText.Trim(),
                    Name             = Regex.Match(tr.ChildNodes[1].InnerText, @"[^\(]*").Value.Trim(),
                    Laboratory       = Regex.Match(tr.ChildNodes[1].InnerText, @"\((.*)\)").Groups[1].Value.Trim(),
                    Concentration    = tr.ChildNodes[2].InnerText.Trim(),
                    LeafletType      = tr.ChildNodes[3].InnerText.Trim(),
                    Category         = tr.ChildNodes[4].InnerText.Trim(),
                    LeafletUrl       = tr.ChildNodes[5].ChildNodes[0].Attributes["href"].Value.Trim(),
                    // todo: commented code: this data has moved to a child page... is this really important?
                    //ApprovementDate = Convert.ToDateTime(tr.ChildNodes[6].InnerText.Trim(), CultureInfo.GetCultureInfo("pt-BR")),
                };

                medicines.Add(medicine);

                //Console.WriteLine("Captured: {0}", medicine.Name);
            }

            Console.WriteLine("got page {0}, with {1} items", pagina, trs2.Length);

            return(medicines);
        }
예제 #2
0
        private List<MedicineRaw> GetItems(int pagina)
        {
            HtmlNodeCollection trs;
            while (true)
            {
                var document = new HtmlDocument();
                string responseText = this.GetResponseTextWithCache(pagina);
                document.LoadHtml(responseText);

                trs = document.DocumentNode.SelectNodes("//*[@class='grid']/tr");

                if (trs.Count > 2)
                    break;
            }

            var medicines = new List<MedicineRaw>(20);

            var trs2 = trs.Skip(1).Reverse().Skip(1).Reverse().ToArray();
            foreach (HtmlNode tr in trs2)
            {
                var medicine = new MedicineRaw
                    {
                        ActiveIngredient = tr.ChildNodes[0].InnerText.Trim(),
                        Name = Regex.Match(tr.ChildNodes[1].InnerText, @"[^\(]*").Value.Trim(),
                        Laboratory = Regex.Match(tr.ChildNodes[1].InnerText, @"\((.*)\)").Groups[1].Value.Trim(),
                        Concentration = tr.ChildNodes[2].InnerText.Trim(),
                        LeafletType = tr.ChildNodes[3].InnerText.Trim(),
                        Category = tr.ChildNodes[4].InnerText.Trim(),
                        LeafletUrl = tr.ChildNodes[5].ChildNodes[0].Attributes["href"].Value.Trim(),
                        // todo: commented code: this data has moved to a child page... is this really important?
                        //ApprovementDate = Convert.ToDateTime(tr.ChildNodes[6].InnerText.Trim(), CultureInfo.GetCultureInfo("pt-BR")),
                    };

                medicines.Add(medicine);

                //Console.WriteLine("Captured: {0}", medicine.Name);
            }

            Console.WriteLine("got page {0}, with {1} items", pagina, trs2.Length);

            return medicines;
        }