private Dictionary<string, List<IPeople>> GroupCategories() { // Sort categories by projects ReportProgress(0, "Grouping categories by projects"); var groupedCategories = this.categories.GroupBy(x => x.Project).ToList(); // Find people with more than 1 phase in each project var projects = new Dictionary<string, List<IPeople>>(); var count = 0; foreach (var project in groupedCategories) { ReportProgress(count, groupedCategories.Count, string.Format("Finding multiple phases people in {0}. Completed: {1}/{2}", project.Key, count, groupedCategories.Count), 50); var projectPeople = new List<IPeople>(); var phasesPeople = project.Select(x => x.Contributions.Select(y => y.Contributor).Distinct().ToList()).ToList(); // People in their phases var projectsDistinctPeople = phasesPeople.SelectMany(x => x).Distinct().ToList(); // All distinct people in the current project (from all phases) var multiplePhasesPeople = projectsDistinctPeople.Where(x => phasesPeople.Count(y => y.Any(z => z == x)) > 1).ToList(); multiplePhasesPeople.ForEach(x => { var person = new People { Name = x }; person.AddProject(project.Key); projectPeople.Add(person); }); projects[project.Key] = projectPeople; } return projects; }
public Dictionary<string, List<string>> CountDomains() { var items = this.people.SelectMany(x => x.Contributions.Select(y => { var person = new People { Name = x.Name, URL = x.URL }; var expert = this.specialists.FirstOrDefault(z => z.Name == x.Name && z.URL == x.URL); if (expert != null) { var domains = expert.Skills.ToList(); domains.ForEach(z => person.AddSkill(z)); } return new { Person = person, Project = y.Project }; })).GroupBy(x => x.Project) .Select(x => new { Project = x.Key, Domains = x.SelectMany(y => y.Person.Skills).Distinct().ToList() // All of each persons domain will be added }) .ToDictionary(x => x.Project, x => x.Domains); return items; }
private void GetPeople(string skill, int index, int totalCount, ref List<IPeople> people) { var baseUrl = "https://www.quirky.com/api/v1/users/by_skill?skill={0}&paginated_options%5Busers%5D%5Bpage%5D={1}&paginated_options%5Busers%5D%5Bper_page%5D=20&paginated_options%5Busers%5D%5Border_column%5D=id&paginated_options%5Busers%5D%5Border%5D=asc"; var hasMore = true; var page = 1; var scrapedCount = 0; while (hasMore) { var url = string.Format(baseUrl, skill, page++); var json = Helper.GetXHRJson(url); var jsonObj = json.FromJson<JObject>(); var stats = jsonObj["paginated_meta"]["users"]; hasMore = stats.Value<bool>("has_next_page"); var total = stats.Value<int>("total"); // The number of people with this skill // Get followers var users = jsonObj["data"].Value<JArray>("users"); scrapedCount += users.Count; foreach (var user in users) { var personName = user.Value<string>("name"); var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id")); var person = people.FirstOrDefault(x => x.Name == personName && x.URL == personUrl); if (person == null) { person = new People { Name = personName, URL = personUrl }; people.Add(person); } person.AddSkill(skill); } ReportProgress(index, totalCount, string.Format("Scraping people with {0} skill... Scraped: {1}/{2}. Progress: {3}/{4}", skill, scrapedCount, total, index, totalCount)); } ReportProgress(index + 1, totalCount, string.Format("Completed scraping people with {0} skill. Scraped: {1}. Progress: {2}/{3}", skill, scrapedCount, index + 1, totalCount)); }
public Project(AmazonDetail amazonDetail, People inventor, IEnumerable<Category> categories) { AmazonDetail = amazonDetail; Inventor = inventor; Categories = categories; }
private void Populate(int index, int totalCount, ref IPeople person, bool isFollower, string personId, int count) { var reportText = isFollower ? "followers" : "followings"; var urlpage = isFollower ? "followers" : "following"; var urlBase = "https://www.quirky.com/api/v1/user_profile/{0}/{1}?paginated_options%5Bfollows%5D%5Buse_cursor%5D=true&paginated_options%5Bfollows%5D%5Bper_page%5D=20&paginated_options%5Bfollows%5D%5Border_column%5D=created_at&paginated_options%5Bfollows%5D%5Border%5D=desc"; var baseUrl = string.Format(urlBase, personId, urlpage); var urlCursorAddition = "&paginated_options%5Bfollows%5D%5Bcursor%5D={0}"; var hasMore = true; var firstIteration = true; string cursor = null; var scrapedCount = 0; while (hasMore) { var url = baseUrl; if (firstIteration) firstIteration = false; // First iteration has no pagination cursor else url += string.Format(urlCursorAddition, cursor); var json = Helper.GetXHRJson(url); var jsonObj = JsonConvert.DeserializeObject(json) as JObject; hasMore = jsonObj["paginated_meta"]["follows"].Value<bool>("has_next_page"); var arr = jsonObj["data"].Value<JArray>("follows"); scrapedCount += arr.Count; if (hasMore) { cursor = arr.Last.Value<string>("created_at"); cursor = Helper.EncodeQuirkyDate(cursor); if (cursor == null) hasMore = false; } // Get followers var users = jsonObj["data"].Value<JArray>("users"); foreach (var user in users) { var personName = user.Value<string>("name"); var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id")); var fellow = new People { Name = personName, URL = personUrl }; if (isFollower) person.AddFollower(fellow); else person.AddFollowing(fellow); } ReportProgress(index, totalCount, string.Format("Scraping {0}'s {1}... Scraped: {2}/{3} {1}. Progress: {4}/{5}", person.Name, reportText, scrapedCount, count, index, totalCount)); } ReportProgress(index + 1, totalCount, string.Format("Completed scraping {0}'s {1}. Scraped: {2}/{3} {1}. Progress: {4}/{5}", person.Name, reportText, scrapedCount, count, index + 1, totalCount)); }
public IEnumerable<object> Scrape() { var contributors = new List<IPeople>(); var totalCount = categories.Count; var progress = 0; ReportProgress(progress, "Starting people scraping..."); this.categories = this.categories.OrderBy(x => x.Project).ToList(); for (var i = 0; i < this.categories.Count; i++) { var category = categories[i]; ReportProgress(progress, totalCount, string.Format("Scraping category: {0} ({1})... Contributions: {2} Progress: {3}/{4}", category.Name, category.Project, category.ContributionNum, i, this.categories.Count)); if (category.ContributionNum == 0) continue; // Nothing to do here var addCategory = new Category { Name = category.Name, Project = category.Project, URL = category.URL }; var contributionsString = "https://www.quirky.com/api/v1/inventions/{0}/with_build_interface_objects?with_random_contributions=true"; var projectId = Regex.Match(category.URL, "(?<=/invent/)[0-9]+(?=/)"); var json = Helper.GetXHRJson(string.Format(contributionsString, projectId)); if (json == null) continue; var jsonObj = JsonConvert.DeserializeObject(json) as JObject; var scrapeCount = 0; var catDetails = jsonObj["data"]["projects"].FirstOrDefault(x => x.Value<string>("human_name") == category.Name); if (catDetails != null) { var catId = catDetails.Value<long>("id"); var contributionString = "https://www.quirky.com/api/v1/contributions/for_project?parent_id={0}&parent_class=Project&paginated_options%5Bcontributions%5D%5Buse_cursor%5D=true&paginated_options%5Bcontributions%5D%5Bper_page%5D=20&paginated_options%5Bcontributions%5D%5Border_column%5D=created_at&paginated_options%5Bcontributions%5D%5Border%5D=desc"; var baseUrl = string.Format(contributionString, catId); json = Helper.GetXHRJson(baseUrl); var additional = "&paginated_options%5Bcontributions%5D%5Bcursor%5D={0}"; var hasMore = true; while (hasMore) { jsonObj = JsonConvert.DeserializeObject(json) as JObject; hasMore = jsonObj["paginated_meta"]["contributions"].Value<bool>("has_next_page"); var arr = jsonObj["data"].Value<JArray>("contributions"); scrapeCount += arr.Count; var cursor = arr.Last().Value<string>("created_at"); cursor = Helper.EncodeQuirkyDate(cursor); if (cursor == null) hasMore = false; var url = baseUrl + string.Format(additional, cursor); // Get contributors var users = jsonObj["data"].Value<JArray>("users"); foreach (var user in users) { var personName = user.Value<string>("name"); var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id")); var person = contributors.FirstOrDefault(x => x.Name == personName && x.URL == personUrl); if (person == null) { person = new People { Name = personName, URL = personUrl }; person.AddContribution(addCategory); contributors.Add(person); } else person.AddContribution(addCategory); } ReportProgress(progress, totalCount, string.Format("Scraping category: {0} ({1})... Scraped: {2}/{3} Progress: {4}/{5}", category.Name, category.Project, scrapeCount, category.ContributionNum, i, this.categories.Count)); if (hasMore) { json = Helper.GetXHRJson(url); } } } ReportProgress(++progress, totalCount, string.Format("Completed scraping category: {0} ({1}). Scraped: {2}/{3} Progress: {4}/{5}", category.Name, category.Project, scrapeCount, category.ContributionNum, i, this.categories.Count)); } MessageBox.Show("People scraping completed..."); return contributors; }