public void parseDocument_lastPage_parseOl()
        {
            // Prepare
            dom.Load(@"..\..\..\..\resources\category_page_last.html");
            parser = new AliExpressParser(dom);
            string output;

            // Act
            parser.ParseDocument();

            using (var stream = new FileStream(outputFile, FileMode.CreateNew))
            {
                parser.WriteOutput(stream);
            }

            output = File.ReadAllText(outputFile);

            var aliList = JsonConvert.DeserializeObject <List <AliExpressItem> >(output);

            File.Delete(outputFile);
            // Assert

            Assert.That(aliList.First().Code, Is.EqualTo("32217250589"));
            Assert.That(aliList.First().Name, Is.EqualTo("Luxo Caso De Couro Real Para LG Optimus G3 D850 D855 Telefone Tampa Traseira Fique Livro Estilo Com Suporte de Cartão"));
            Assert.That(aliList.First().Price, Is.EqualTo("US $3.78"));
            Assert.That(parser.NextPage, Is.Null);
        }
        public void parseDocument_validDocument_parseOk()
        {
            // Prepare
            parser = new AliExpressParser(dom);
            string output;

            // Act
            parser.ParseDocument();

            using (var stream = new FileStream(outputFile, FileMode.CreateNew))
            {
                parser.WriteOutput(stream);
            }

            output = File.ReadAllText(outputFile);

            var aliList = JsonConvert.DeserializeObject <List <AliExpressItem> >(output);

            File.Delete(outputFile);
            // Assert

            Assert.That(aliList.First().Code, Is.EqualTo("32710766482"));
            Assert.That(aliList.First().Name, Is.EqualTo("WolfRule Handbag Cover Flip PU Leather Silicone Wallet Phone Case Pro Case For Doogee"));
            Assert.That(aliList.First().Price, Is.EqualTo("US $2.64"));
        }
Пример #3
0
        static void Main(string[] args)
        {
            string outputPath = ConfigurationManager.AppSettings.Get("OutputPath");

            Console.WriteLine("Digit the AliExpress URL that you want to scrap data:");
            var url = Console.ReadLine();

            var scraper = new WebScraper(url);

            int num_pages = 0;

            do
            {
                Console.WriteLine("How many pages do you want to scrap?");

                try
                {
                    num_pages = Int32.Parse(Console.ReadLine());
                }
                catch (Exception)
                {
                    Console.WriteLine("Invalid input value. Try to use a valid number.");
                }
            } while (num_pages == 0);

            var parser = new AliExpressParser(scraper.LoadPage());

            int page_counter = 1;

            do
            {
                parser.ParseDocument();

                var outputFile = Path.Combine(outputPath, $"{parser.Category.ToLower().Replace(" ", String.Empty)}_page_{page_counter}.json");

                if (File.Exists(outputFile))
                {
                    File.Delete(outputFile);
                }

                using (var stream = new FileStream(outputFile, FileMode.CreateNew))
                {
                    Console.WriteLine($"Saving page {page_counter} into {outputFile}");
                    parser.WriteOutput(stream);
                }

                if (!String.IsNullOrEmpty(parser.NextPage))
                {
                    scraper = new WebScraper(parser.NextPage);
                    parser  = new AliExpressParser(scraper.LoadPage());
                    page_counter++;
                }
            } while (page_counter <= num_pages);
        }
        private async Task <int> Download(AliExpressScraperForm form)
        {
            return(await Task.Run(() =>
            {
                var scraper = new WebScraper(form.AliURLText.Text);
                var parser = new AliExpressParser(scraper.LoadPage());


                int page_counter = 1;

                var num_pages = form.PagesRadio10.Checked ? 10 : (form.PagesRadio50.Checked ? 50 : 100);

                do
                {
                    parser.ParseDocument();

                    var outputFile = Path.Combine(form.OutpurDirText.Text, $"{parser.Category.ToLower().Replace(" ", String.Empty)}_page_{page_counter}.json");

                    if (File.Exists(outputFile))
                    {
                        File.Delete(outputFile);
                    }

                    using (var stream = new FileStream(outputFile, FileMode.CreateNew))
                    {
                        LogText($"Saving page {page_counter} into {outputFile}\n");
                        parser.WriteOutput(stream);
                    }

                    if (!String.IsNullOrEmpty(parser.NextPage))
                    {
                        scraper = new WebScraper(parser.NextPage);
                        parser = new AliExpressParser(scraper.LoadPage());
                    }

                    page_counter++;
                } while (page_counter <= num_pages);

                return 1;
            }));
        }