Exemple #1
0
 /// <summary>
 /// インスタンスを生成します。
 /// </summary>
 /// <param name="scrapingService"></param>
 /// <param name="blobClient"></param>
 /// <param name="searchClient"></param>
 /// <param name="logger"></param>
 public SearchService(ScrapingService scrapingService, CloudBlobClient blobClient, SearchServiceClient searchClient, ILogger <SearchService> logger)
 {
     this.ScrapingService = scrapingService;
     this.BlobClient      = blobClient;
     this.SearchClient    = searchClient;
     this.Logger          = logger;
 }
        public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName)
        {
            if (!string.IsNullOrEmpty(scrapingService.Options.Name))
            {
                userName = scrapingService.Options.Name;
            }

            if (!string.IsNullOrEmpty(scrapingService.Options.Category))
            {
                categoryName = scrapingService.Options.Category;
            }

            Console.Write($"GALLERY {userName} / {categoryName} / {galleryName} ");
            HtmlDocument htmlDoc = OpenDocument(url);

            var pictures = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//div[@id='gallery']//a");

            Parallel.ForEach(
                pictures,
                (node) =>
            {
                string href = node.GetAttributeValue("href", string.Empty);

                if (href.StartsWith("/photo/"))
                {
                    DownloadPicture(scrapingService.Options, href, userName, categoryName, galleryName);
                }
            });

            Console.WriteLine();
        }
        private void ScrapOrganizerPage(ScrapingService scrapingService, string userName, string categoryName, HtmlDocument htmlDoc)
        {
            Console.WriteLine($"ORGANIZER {userName} / {categoryName}");

            var galleries     = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']");
            var processedUrls = new List <string>();

            foreach (var node in galleries)
            {
                if (node.InnerHtml.Contains("Overview") ||
                    node.InnerHtml.Contains("[Show All]") ||
                    node.InnerHtml.Contains("[Hide All]") ||
                    !node.Attributes["href"].Value.StartsWith("/gallery/") ||
                    processedUrls.Contains(node.Attributes["href"].Value))
                {
                    continue;
                }

                processedUrls.Add(node.Attributes["href"].Value);

                var responseUrl = node.Attributes["href"].Value.StartsWith(BASE_URL) ?
                                  GetResponseUrl(node.Attributes["href"].Value) :
                                  GetResponseUrl(BASE_URL + "/" + node.Attributes["href"].Value);

                scrapingService.ScrapeUrl(responseUrl + "?view=2", userName, categoryName, node.InnerText.Replace("\t", string.Empty).Replace("\n", string.Empty));
            }
        }
Exemple #4
0
        public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName)
        {
            var startIndex = url.IndexOf("profile/") + 8;
            var length     = url.IndexOf("/galleries") - startIndex;

            userName = url.Substring(startIndex, length);

            Console.WriteLine($"USER {userName}");

            HtmlDocument htmlDoc = OpenDocument(url);

            var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']");

            foreach (var node in galleries)
            {
                if (node.InnerHtml.Contains("<b>Overview</b>") ||
                    node.InnerHtml.Contains("[Show All]") ||
                    node.InnerHtml.Contains("[Hide All]"))
                {
                    continue;
                }

                var responseUrl = GetResponseUrl(node.Attributes["href"].Value);

                scrapingService.ScrapeUrl(responseUrl, userName, node.InnerText, null);
            }
        }
Exemple #5
0
        public void TestMethod6()
        {
            // stock by Toyota
            string code = "7203.T";

            Task.Run(async() =>
            {
                int res = await ScrapingService.GetFinancePriceAsync(code);
                Console.WriteLine($"stock price : {res}");
                Assert.IsTrue(res > 4000);
            }).GetAwaiter().GetResult();
        }
        public async Task <ActionResult <IEnumerable <ViewModel> > > GetAsync(
            [FromQuery] string user,
            [FromQuery] string repo,
            [FromServices] ScrapingService scrapingService)
        {
            try
            {
                var endpoint = $"{user}/{repo}";

                return((await scrapingService.GetResultRepositoryAsync(endpoint)).ToList());
            }
            catch (Exception)
            {
                return(BadRequest("It was not possible to obtain results for this repository"));
            }
        }
        public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName)
        {
            HtmlDocument htmlDoc = OpenDocument(url);

            var pages = ExtractPages(htmlDoc);

            if (pages.Count == 0)
            {
                ScrapOrganizerPage(scrapingService, userName, categoryName, htmlDoc);
            }
            else
            {
                foreach (string containedUrl in pages)
                {
                    htmlDoc = OpenDocument(url + containedUrl);
                    ScrapOrganizerPage(scrapingService, userName, categoryName, htmlDoc);
                }
            }
        }
Exemple #8
0
        static async Task Main(string[] args)
        {
            //CsvReaderService _csvReader = new CsvReaderService();
            //_csvReader.ReadTeamFile();
            LadderService   _ladder         = new LadderService();
            List <Team>     teams           = new List <Team>();
            ScrapingService _scrapper       = new ScrapingService();
            Team            theFormed       = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheFormed));
            Team            theTwisted      = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheTwisted));
            Team            theFeared       = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheFeared));
            Team            theHidden       = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheHidden));
            List <Item>     theFormedItems  = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheFormedItems));
            List <Item>     theTwistedItems = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheTwistedItems));
            List <Item>     theFearedItems  = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheFearedItems));
            List <Item>     theHiddenTeams  = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheHiddenItems));
            List <Member>   members         = JsonSerializer.Deserialize <List <Member> >(File.ReadAllText(filePathBase + "\\BPL3Members.json"));


            List <Member> updateMember = await GetLadder(members, new List <Team> {
                theFormed, theTwisted, theFeared, theHidden
            });

            SerializeJsonService.SerializeJson(updateMember, filePathBase + "\\BPL3Members.json");
            TeamItem updateFormed = await _scrapper.GetItems(theFormed.StashUrl, new TeamItem { Team = theFormed, Items = theFormedItems });

            TeamItem updateFearedItems = await _scrapper.GetItems(theFeared.StashUrl, new TeamItem { Team = theFeared, Items = theFearedItems });

            TeamItem updateTwistedItems = await _scrapper.GetItems(theTwisted.StashUrl, new TeamItem { Team = theTwisted, Items = theTwistedItems });

            TeamItem updateHiddenItems = await _scrapper.GetItems(theHidden.StashUrl, new TeamItem { Team = theHidden, Items = theHiddenTeams });

            List <Member> theTwistedMembers = members.Where(m => m.TeamName == "The Twisted").ToList();
            List <Member> theFearedMembers  = members.Where(m => m.TeamName == "The Feared").ToList();
            List <Member> theHiddenMembers  = members.Where(m => m.TeamName == "The Hidden").ToList();
            List <Member> theFormedMembers  = members.Where(m => m.TeamName == "The Formed").ToList();

            List <int> points = new List <int>();

            points.AddRange(CalcPoints(theTwistedMembers));
            points.AddRange(CalcPoints(theFearedMembers));
            points.AddRange(CalcPoints(theHiddenMembers));
            points.AddRange(CalcPoints(theFormedMembers));
            theTwisted.LevelPoints = points[0];
            theTwisted.DelvePoints = points[1];
            theTwisted.TotalPoints = theTwisted.LevelPoints + theTwisted.DelvePoints + theTwisted.SetPoints;
            theFeared.LevelPoints  = points[2];
            theFeared.DelvePoints  = points[3];
            theFeared.TotalPoints  = theFeared.LevelPoints + theFeared.DelvePoints + theFeared.SetPoints;
            theHidden.LevelPoints  = points[4];
            theHidden.DelvePoints  = points[5];
            theHidden.TotalPoints  = theHidden.LevelPoints + theHidden.DelvePoints + theHidden.SetPoints;
            theFormed.LevelPoints  = points[6];
            theFormed.DelvePoints  = points[7];
            theFormed.TotalPoints  = theFormed.LevelPoints + theFormed.DelvePoints + theFormed.SetPoints;

            SerializeJsonService.SerializeJson(updateFormed.Team, filePathBase + filePathTheFormed);
            SerializeJsonService.SerializeJson(updateFormed.Items, filePathBase + filePathTheFormedItems);
            SerializeJsonService.SerializeJson(updateHiddenItems.Team, filePathBase + filePathTheHidden);
            SerializeJsonService.SerializeJson(updateHiddenItems.Items, filePathBase + filePathTheHiddenItems);
            SerializeJsonService.SerializeJson(updateTwistedItems.Team, filePathBase + filePathTheTwisted);
            SerializeJsonService.SerializeJson(updateTwistedItems.Items, filePathBase + filePathTheTwistedItems);
            SerializeJsonService.SerializeJson(updateFearedItems.Team, filePathBase + filePathTheFeared);
            SerializeJsonService.SerializeJson(updateFearedItems.Items, filePathBase + filePathTheFearedItems);
        }
        static void Main(string[] args)
        {
            ScrapingService scrapingService = new ScrapingService();

            scrapingService.Scrape();
        }
        static void Main(string[] args)
        {
            string agentId = "qgddevdqzm";

            var agentyService = new ScrapingService();

            List <string> urls = new List <string>
            {
                "http://books.toscrape.com/catalogue/set-me-free_988/index.html",
                "http://books.toscrape.com/catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html",
                "http://books.toscrape.com/catalogue/the-black-maria_991/index.html"
            };

            Console.WriteLine("************Starting Agenty Service************");

            // Add URLs to Agent
            Console.WriteLine("****Add URLs to Agent****");
            var response1 = agentyService.AddUrlsToAgent(agentId, urls);

            Console.WriteLine($"StatusCode : {response1.status_code}");
            Console.WriteLine($"Message : {response1.message}");

            // Start the Scraping Job
            Console.WriteLine("****Start the Scraping Job****");
            var response2 = agentyService.StartScrapingAgent(agentId);

            Console.WriteLine($"StatusCode : {response2.status_code}");
            Console.WriteLine($"Message : {response2.message}");
            Console.WriteLine($"Job Id : {response2.job_id}");

            // Check job status in while Loop until its' completed/stopped/aborted
            Console.WriteLine("****Check job status in loop****");

            string[] completedStatus = new string[] { "completed", "stopped", "aborted" };
            string   jobStatus       = "running";

            while (!completedStatus.Any(x => x == jobStatus))
            {
                var response3 = agentyService.GetJobStatus(response2.job_id);
                jobStatus = response3.status;

                Console.WriteLine($"Job status : {response3.status} - Pages processed: ({response3.pages_processed}/{response3.pages_total})");
                if (!completedStatus.Any(x => x == jobStatus))
                {
                    Console.WriteLine($"Rechecking status after 2 second...");
                    Thread.Sleep(2000);
                }
            }

            Console.WriteLine("****Download scraping job result****");
            var response4 = agentyService.GetJobResult(response2.job_id);

            Console.WriteLine($"Total rows in result : {response4.total}");

            string    localPath = @"Y:\scraping\result.txt";
            var       json      = JsonConvert.SerializeObject(response4.result);
            DataTable table     = (DataTable)JsonConvert.DeserializeObject(json, (typeof(DataTable)));

            WriteToFile(table, localPath);

            Console.WriteLine($"Result download at: {localPath}");

            Console.ReadKey();
        }
Exemple #11
0
 public RankingService(IServiceProvider provider) : base(provider)
 {
     _scraper     = provider.GetRequiredService <ScrapingService>();
     _maplerRegex = new Regex(@"<td class=""align-middle"">(<img src=""/static/images/rank/(guild_master|islander)\.png"">(</img>)?(<br>|<br/>))?<b>(?<name>[A-Za-z0-9]{4,12})</b>(<br>|<br/>)\s*(<img src=""/static/images/rank/emblem/\d{8}\.\d{1,2}\s*\.png"">\s*)?\w{0,12}</td>\s*<!--job-->\s*<td class=""align-middle""><img src=""/static/images/rank/[a-zA-Z]{5,8}\.png"">(</img>)?(<br>|<br/>)(?<job>[\w()/\s]{4,25})</td>\s*<!--level & exp -->\s*<td class=""align-middle""><b>(?<level>\d{1,3})</b>(<br>|<br/>)");
 }
Exemple #12
0
 public ScoreBoardController(ILogger <ScrapingService> log, ScrapingService scrapingService)
 {
     _log             = log;
     _scrapingService = scrapingService;
 }
Exemple #13
0
        public ScrapingServiceTests()
        {
            _httpClient = new Mock <ITvMazeHttpClient>();

            _target = new ScrapingService(_httpClient.Object);
        }
 public virtual void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName)
 {
 }
 public ArpenspController(MPSPDbContext db)
 {
     _db   = db;
     scrap = new ScrapingService();
 }
Exemple #16
0
 public CagedController(MPSPDbContext db)
 {
     _db   = db;
     scrap = new ScrapingService();
 }
 public InfocrimController(MPSPDbContext db, IConfiguration conf)
 {
     _db   = db;
     scrap = new ScrapingService(conf);
 }
Exemple #18
0
 public WebJobMethods(ScrapingService scrapingService)
 {
     _scrapingService = scrapingService;
 }