public void parseDocument_lastPage_parseOl() { // Prepare dom.Load(@"..\..\..\..\resources\category_page_last.html"); parser = new AliExpressParser(dom); string output; // Act parser.ParseDocument(); using (var stream = new FileStream(outputFile, FileMode.CreateNew)) { parser.WriteOutput(stream); } output = File.ReadAllText(outputFile); var aliList = JsonConvert.DeserializeObject <List <AliExpressItem> >(output); File.Delete(outputFile); // Assert Assert.That(aliList.First().Code, Is.EqualTo("32217250589")); Assert.That(aliList.First().Name, Is.EqualTo("Luxo Caso De Couro Real Para LG Optimus G3 D850 D855 Telefone Tampa Traseira Fique Livro Estilo Com Suporte de Cartão")); Assert.That(aliList.First().Price, Is.EqualTo("US $3.78")); Assert.That(parser.NextPage, Is.Null); }
public void parseDocument_validDocument_parseOk() { // Prepare parser = new AliExpressParser(dom); string output; // Act parser.ParseDocument(); using (var stream = new FileStream(outputFile, FileMode.CreateNew)) { parser.WriteOutput(stream); } output = File.ReadAllText(outputFile); var aliList = JsonConvert.DeserializeObject <List <AliExpressItem> >(output); File.Delete(outputFile); // Assert Assert.That(aliList.First().Code, Is.EqualTo("32710766482")); Assert.That(aliList.First().Name, Is.EqualTo("WolfRule Handbag Cover Flip PU Leather Silicone Wallet Phone Case Pro Case For Doogee")); Assert.That(aliList.First().Price, Is.EqualTo("US $2.64")); }
static void Main(string[] args) { string outputPath = ConfigurationManager.AppSettings.Get("OutputPath"); Console.WriteLine("Digit the AliExpress URL that you want to scrap data:"); var url = Console.ReadLine(); var scraper = new WebScraper(url); int num_pages = 0; do { Console.WriteLine("How many pages do you want to scrap?"); try { num_pages = Int32.Parse(Console.ReadLine()); } catch (Exception) { Console.WriteLine("Invalid input value. Try to use a valid number."); } } while (num_pages == 0); var parser = new AliExpressParser(scraper.LoadPage()); int page_counter = 1; do { parser.ParseDocument(); var outputFile = Path.Combine(outputPath, $"{parser.Category.ToLower().Replace(" ", String.Empty)}_page_{page_counter}.json"); if (File.Exists(outputFile)) { File.Delete(outputFile); } using (var stream = new FileStream(outputFile, FileMode.CreateNew)) { Console.WriteLine($"Saving page {page_counter} into {outputFile}"); parser.WriteOutput(stream); } if (!String.IsNullOrEmpty(parser.NextPage)) { scraper = new WebScraper(parser.NextPage); parser = new AliExpressParser(scraper.LoadPage()); page_counter++; } } while (page_counter <= num_pages); }
private async Task <int> Download(AliExpressScraperForm form) { return(await Task.Run(() => { var scraper = new WebScraper(form.AliURLText.Text); var parser = new AliExpressParser(scraper.LoadPage()); int page_counter = 1; var num_pages = form.PagesRadio10.Checked ? 10 : (form.PagesRadio50.Checked ? 50 : 100); do { parser.ParseDocument(); var outputFile = Path.Combine(form.OutpurDirText.Text, $"{parser.Category.ToLower().Replace(" ", String.Empty)}_page_{page_counter}.json"); if (File.Exists(outputFile)) { File.Delete(outputFile); } using (var stream = new FileStream(outputFile, FileMode.CreateNew)) { LogText($"Saving page {page_counter} into {outputFile}\n"); parser.WriteOutput(stream); } if (!String.IsNullOrEmpty(parser.NextPage)) { scraper = new WebScraper(parser.NextPage); parser = new AliExpressParser(scraper.LoadPage()); } page_counter++; } while (page_counter <= num_pages); return 1; })); }