private Dictionary<string, List<IPeople>> GroupCategories()
        {
            // Sort categories by projects
            ReportProgress(0, "Grouping categories by projects");
            var groupedCategories = this.categories.GroupBy(x => x.Project).ToList();

            // Find people with more than 1 phase in each project
            var projects = new Dictionary<string, List<IPeople>>();
            var count = 0;
            foreach (var project in groupedCategories)
            {
                ReportProgress(count, groupedCategories.Count, string.Format("Finding multiple phases people in {0}. Completed: {1}/{2}", project.Key, count, groupedCategories.Count), 50);
                var projectPeople = new List<IPeople>();
                var phasesPeople = project.Select(x => x.Contributions.Select(y => y.Contributor).Distinct().ToList()).ToList();    // People in their phases
                var projectsDistinctPeople = phasesPeople.SelectMany(x => x).Distinct().ToList();                                              // All distinct people in the current project (from all phases)
                var multiplePhasesPeople = projectsDistinctPeople.Where(x => phasesPeople.Count(y => y.Any(z => z == x)) > 1).ToList();
                multiplePhasesPeople.ForEach(x =>
                {
                    var person = new People { Name = x };
                    person.AddProject(project.Key);
                    projectPeople.Add(person);
                });

                projects[project.Key] = projectPeople;
            }

            return projects;
        }
        public Dictionary<string, List<string>> CountDomains()
        {
            var items = this.people.SelectMany(x => x.Contributions.Select(y =>
            {
                var person = new People
                {
                    Name = x.Name,
                    URL = x.URL
                };
                var expert = this.specialists.FirstOrDefault(z => z.Name == x.Name && z.URL == x.URL);
                if (expert != null)
                {
                    var domains = expert.Skills.ToList();
                    domains.ForEach(z => person.AddSkill(z));
                }

                return new
                {
                    Person = person,
                    Project = y.Project
                };
            })).GroupBy(x => x.Project)
            .Select(x => new
            {
                Project = x.Key,
                Domains = x.SelectMany(y => y.Person.Skills).Distinct().ToList()    // All of each persons domain will be added
            })
            .ToDictionary(x => x.Project, x => x.Domains);

            return items;
        }
        private void GetPeople(string skill, int index, int totalCount, ref List<IPeople> people)
        {
            var baseUrl = "https://www.quirky.com/api/v1/users/by_skill?skill={0}&paginated_options%5Busers%5D%5Bpage%5D={1}&paginated_options%5Busers%5D%5Bper_page%5D=20&paginated_options%5Busers%5D%5Border_column%5D=id&paginated_options%5Busers%5D%5Border%5D=asc";
            var hasMore = true;
            var page = 1;
            var scrapedCount = 0;

            while (hasMore)
            {
                var url = string.Format(baseUrl, skill, page++);
                var json = Helper.GetXHRJson(url);
                var jsonObj = json.FromJson<JObject>();

                var stats = jsonObj["paginated_meta"]["users"];
                hasMore = stats.Value<bool>("has_next_page");
                var total = stats.Value<int>("total");  // The number of people with this skill

                // Get followers
                var users = jsonObj["data"].Value<JArray>("users");
                scrapedCount += users.Count;
                foreach (var user in users)
                {
                    var personName = user.Value<string>("name");
                    var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id"));

                    var person = people.FirstOrDefault(x => x.Name == personName && x.URL == personUrl);
                    if (person == null)
                    {
                        person = new People
                        {
                            Name = personName,
                            URL = personUrl
                        };
                        people.Add(person);
                    }
                    person.AddSkill(skill);
                }

                ReportProgress(index, totalCount,
                    string.Format("Scraping people with {0} skill... Scraped: {1}/{2}. Progress: {3}/{4}", skill, scrapedCount, total, index, totalCount));
            }

            ReportProgress(index + 1, totalCount,
                string.Format("Completed scraping people with {0} skill. Scraped: {1}. Progress: {2}/{3}", skill, scrapedCount, index + 1, totalCount));
        }
Beispiel #4
0
 public Project(AmazonDetail amazonDetail, People inventor, IEnumerable<Category> categories)
 {
     AmazonDetail = amazonDetail;
     Inventor = inventor;
     Categories = categories;
 }
        private void Populate(int index, int totalCount, ref IPeople person, bool isFollower, string personId, int count)
        {
            var reportText = isFollower ? "followers" : "followings";

            var urlpage = isFollower ? "followers" : "following";
            var urlBase = "https://www.quirky.com/api/v1/user_profile/{0}/{1}?paginated_options%5Bfollows%5D%5Buse_cursor%5D=true&paginated_options%5Bfollows%5D%5Bper_page%5D=20&paginated_options%5Bfollows%5D%5Border_column%5D=created_at&paginated_options%5Bfollows%5D%5Border%5D=desc";
            var baseUrl = string.Format(urlBase, personId, urlpage);
            var urlCursorAddition = "&paginated_options%5Bfollows%5D%5Bcursor%5D={0}";

            var hasMore = true;
            var firstIteration = true;

            string cursor = null;
            var scrapedCount = 0;

            while (hasMore)
            {
                var url = baseUrl;
                if (firstIteration) firstIteration = false; // First iteration has no pagination cursor
                else url += string.Format(urlCursorAddition, cursor);

                var json = Helper.GetXHRJson(url);

                var jsonObj = JsonConvert.DeserializeObject(json) as JObject;

                hasMore = jsonObj["paginated_meta"]["follows"].Value<bool>("has_next_page");
                var arr = jsonObj["data"].Value<JArray>("follows");
                scrapedCount += arr.Count;

                if (hasMore)
                {
                    cursor = arr.Last.Value<string>("created_at");
                    cursor = Helper.EncodeQuirkyDate(cursor);
                    if (cursor == null) hasMore = false;
                }

                // Get followers
                var users = jsonObj["data"].Value<JArray>("users");
                foreach (var user in users)
                {
                    var personName = user.Value<string>("name");
                    var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id"));

                    var fellow = new People
                    {
                        Name = personName,
                        URL = personUrl
                    };
                    if (isFollower)
                        person.AddFollower(fellow);
                    else
                        person.AddFollowing(fellow);
                }

                ReportProgress(index, totalCount,
                    string.Format("Scraping {0}'s {1}... Scraped: {2}/{3} {1}. Progress: {4}/{5}", person.Name, reportText, scrapedCount, count, index, totalCount));
            }

            ReportProgress(index + 1, totalCount,
                string.Format("Completed scraping {0}'s {1}. Scraped: {2}/{3} {1}. Progress: {4}/{5}", person.Name, reportText, scrapedCount, count, index + 1, totalCount));
        }
Beispiel #6
0
        public IEnumerable<object> Scrape()
        {
            var contributors = new List<IPeople>();
            var totalCount = categories.Count;
            var progress = 0;
            ReportProgress(progress, "Starting people scraping...");

            this.categories = this.categories.OrderBy(x => x.Project).ToList();

            for (var i = 0; i < this.categories.Count; i++)
            {
                var category = categories[i];
                ReportProgress(progress, totalCount,
                    string.Format("Scraping category: {0} ({1})... Contributions: {2} Progress: {3}/{4}", category.Name, category.Project, category.ContributionNum, i, this.categories.Count));

                if (category.ContributionNum == 0) continue;    // Nothing to do here
                var addCategory = new Category
                {
                    Name = category.Name,
                    Project = category.Project,
                    URL = category.URL
                };

                var contributionsString = "https://www.quirky.com/api/v1/inventions/{0}/with_build_interface_objects?with_random_contributions=true";
                var projectId = Regex.Match(category.URL, "(?<=/invent/)[0-9]+(?=/)");

                var json = Helper.GetXHRJson(string.Format(contributionsString, projectId));
                if (json == null) continue;

                var jsonObj = JsonConvert.DeserializeObject(json) as JObject;

                var scrapeCount = 0;

                var catDetails = jsonObj["data"]["projects"].FirstOrDefault(x => x.Value<string>("human_name") == category.Name);
                if (catDetails != null)
                {
                    var catId = catDetails.Value<long>("id");
                    var contributionString = "https://www.quirky.com/api/v1/contributions/for_project?parent_id={0}&parent_class=Project&paginated_options%5Bcontributions%5D%5Buse_cursor%5D=true&paginated_options%5Bcontributions%5D%5Bper_page%5D=20&paginated_options%5Bcontributions%5D%5Border_column%5D=created_at&paginated_options%5Bcontributions%5D%5Border%5D=desc";
                    var baseUrl = string.Format(contributionString, catId);

                    json = Helper.GetXHRJson(baseUrl);
                    var additional = "&paginated_options%5Bcontributions%5D%5Bcursor%5D={0}";

                    var hasMore = true;
                    while (hasMore)
                    {
                        jsonObj = JsonConvert.DeserializeObject(json) as JObject;
                        hasMore = jsonObj["paginated_meta"]["contributions"].Value<bool>("has_next_page");
                        var arr = jsonObj["data"].Value<JArray>("contributions");
                        scrapeCount += arr.Count;

                        var cursor = arr.Last().Value<string>("created_at");
                        cursor = Helper.EncodeQuirkyDate(cursor);
                        if (cursor == null) hasMore = false;

                        var url = baseUrl + string.Format(additional, cursor);

                        // Get contributors
                        var users = jsonObj["data"].Value<JArray>("users");
                        foreach (var user in users)
                        {
                            var personName = user.Value<string>("name");
                            var personUrl = string.Format(PeopleScraper.USER_URL_FORMAT, user.Value<string>("id"));

                            var person = contributors.FirstOrDefault(x => x.Name == personName && x.URL == personUrl);
                            if (person == null)
                            {
                                person = new People
                                {
                                    Name = personName,
                                    URL = personUrl
                                };
                                person.AddContribution(addCategory);
                                contributors.Add(person);
                            }
                            else
                                person.AddContribution(addCategory);
                        }

                        ReportProgress(progress, totalCount,
                            string.Format("Scraping category: {0} ({1})... Scraped: {2}/{3} Progress: {4}/{5}", category.Name, category.Project, scrapeCount, category.ContributionNum, i, this.categories.Count));

                        if (hasMore)
                        {
                            json = Helper.GetXHRJson(url);
                        }
                    }
                }

                ReportProgress(++progress, totalCount,
                    string.Format("Completed scraping category: {0} ({1}). Scraped: {2}/{3} Progress: {4}/{5}", category.Name, category.Project, scrapeCount, category.ContributionNum, i, this.categories.Count));
            }

            MessageBox.Show("People scraping completed...");
            return contributors;
        }