Example #1
0
        /// <summary>
        /// This method can be used for collecting data from API endpoints that return JSON arrays of User objects.
        /// These API endpoints are characterized by the /users/ route.
        /// Examples are users following a specific user, or the users that specific user is following.
        /// </summary>
        /// <param name="userName">The first parameter to the /users/ API endpoint.</param>
        /// <param name="route">The route modifier. Either "followers" or "following"</param>
        /// <returns></returns>
        private JsonArray GetAllFollowersOrFollowingsByUsername(string userName, string route)
        {
            string    apiEndpoint      = "https://api.scratch.mit.edu/users/" + userName + "/" + route + "?limit=40&offset={0}";
            bool      endOfDataReached = false;
            int       offset           = 0;
            JsonArray allUsers         = new JsonArray();

            while (!endOfDataReached)
            {
                string specifiedApiEndpoint = string.Format(apiEndpoint, offset);
                string returnedUsersJson    = JSONGetter.GetAsJSONString(specifiedApiEndpoint);
                if (string.IsNullOrEmpty(returnedUsersJson))
                {
                    break;
                }
                var parsedUsers = JsonValue.Parse(returnedUsersJson);
                if (parsedUsers.Count == 0)
                {
                    endOfDataReached = true;
                }
                foreach (var follower in parsedUsers)
                {
                    allUsers.Add(follower.Value);
                }
                offset += 40;
            }
            return(allUsers);
        }
Example #2
0
    public static JSONGetter GetJSON()
    {
        GameObject json = new GameObject("JSONGetter");

        json.AddComponent <JSONGetter>();
        JSONGetter jsonGetter = json.GetComponent <JSONGetter>() as JSONGetter;

        return(jsonGetter);
    }
    private void OnSubmitAction()
    {
        WKSigleton.Instance.GetPlayerName = _PlayerName.text;

        JSONGetter jsonGetter = JSONGetter.GetJSON();

        jsonGetter._CreateName.name = _PlayerName.text;
        jsonGetter.StartParsing(EJSONType.JSON_PLAYERNAME, JSONAction);
    }
Example #4
0
        public void Scrape(int skip = 0)
        {
            string[] allURLSToConsider = new string[]
            {
                "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=trending&q=*",
                "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=popular&q=*",
                "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=recent&q=*",
                "https://api.scratch.mit.edu/search/projects?limit=40q=*&offset={0}",
            };

            foreach (string URL in allURLSToConsider)
            {
                int    offset       = skip;
                string baseURL      = URL;
                bool   stopScraping = false;
                try
                {
                    while (stopScraping != true)
                    {
                        Console.WriteLine("Scraping at offset: " + offset.ToString());

                        string specificURL = string.Format(baseURL, offset.ToString());
                        string rawJson     = JSONGetter.GetAsJSONString(specificURL);
                        if (string.IsNullOrEmpty(rawJson))
                        {
                            Console.WriteLine("\t\tGetJSON2 returned null."); continue;
                        }

                        dynamic projectsObject = JsonValue.Parse(rawJson);
                        List <ProjectAuthor> scrapedAuthors = new List <ProjectAuthor>();
                        foreach (var projectData in projectsObject)
                        {
                            string        authorJson    = GetAuthorJson(projectData["author"]["username"].ReadAs <string>());
                            ProjectAuthor projectAuthor = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectAuthor>(authorJson);
                            scrapedAuthors.Add(projectAuthor);
                            WriteAuthorToFile(projectAuthor.username, authorJson);
                        }

                        SaveAuthorsToDatabase(ProjectAuthorsToDatabaseEntities(scrapedAuthors.GroupBy(x => x.id).Select(y => y.First()).ToList()));

                        offset += 40;
                        if (offset > 9980)
                        {
                            stopScraping = true;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"Exception ocurred: {ex.Message}");
                    offset += 40;
                    continue;
                }
            }
        }
Example #5
0
        public static void writeProperties(string _path)
        {
            //this method assumes that in you have scraped a number of Scratch files
            //it will then put all the corresponding properties in /properties


            DirectoryInfo d = new DirectoryInfo(_path);

            FileInfo[] Files = d.GetFiles(); //Getting files
            int        i     = 0;

            foreach (FileInfo file in Files)
            {
                //get the id:
                string id = Path.GetFileNameWithoutExtension(file.Name);

                string projectURL = @"https://scratch.mit.edu/projects/" + id + "/?x=" + DateTime.Now.ToString();
                //we are adding a fake quety string to prevent the browser form loading from the cache and getting old data

                var HTML = JSONGetter.GetJSON(projectURL);

                if (HTML != null)
                {
                    if (isShared(HTML))
                    {
                        string pathForProperties = _path + "properties\\properties.sb";

                        JSONGetter.writeStringToFile(id + ",", pathForProperties, true, false);

                        FindCountandWritetoFile(HTML, "fav-count", pathForProperties);
                        FindCountandWritetoFile(HTML, "love-count", pathForProperties);

                        FindCountandWritetoFile(HTML, "icon views", pathForProperties);
                        FindCountandWritetoFile(HTML, "icon remix-tree", pathForProperties);

                        FindCountandWritetoFile(HTML, "Shared:", pathForProperties);
                        FindCountandWritetoFile(HTML, "Modified:", pathForProperties);

                        FindUserWritetoFile(HTML, pathForProperties);
                    }
                    else
                    {
                        string pathForProperties = _path + "properties\\notShared.sb";
                        JSONGetter.writeStringToFile(id, pathForProperties, true, true);
                    }
                }

                Console.WriteLine(i.ToString());
                i++;
            }
        }
Example #6
0
        private static void FindUserWritetoFile(string HTML, string pathForProperties)
        {
            var toFind = "id=\"owner";
            var found  = HTML.IndexOf(toFind);

            if (found != -1)
            {
                var endofSpan = HTML.IndexOf("</span>", found);
                var item      = HTML.Substring(found + toFind.Length + 2, endofSpan - found - toFind.Length - 2);

                var itemNoSpaces = item.Replace(" ", "").Replace("&nbsp;", "").Replace("\n", "");
                JSONGetter.writeStringToFile(itemNoSpaces, pathForProperties, true, true);
            }
        }
Example #7
0
        private static void FindCountandWritetoFile(string HTML, string toFind, string pathForProperties)
        {
            var found = HTML.IndexOf(toFind);

            if (found != -1)
            {
                var endofSpan = HTML.IndexOf("</span>", found);
                var item      = HTML.Substring(found + toFind.Length + 2, endofSpan - found - toFind.Length - 2);

                var itemNoSpacesandComma = item.Replace(" ", "").Replace("&nbsp;", "").Replace("\n", "") + ",";
                if (itemNoSpacesandComma == ",")
                {
                    itemNoSpacesandComma = "0,";
                }

                JSONGetter.writeStringToFile(itemNoSpacesandComma, pathForProperties, true, false);
            }
        }
Example #8
0
        static ArrayList flatten(ref int order, JsonArray scripts, ref string scopeType, ref string scopeName, ref int indent, string path, string id, ref int maxIndent)
        {
            var result = new ArrayList();

            if (scopeName[0] != '"')
            {
                //not in quotes? add them
                scopeName = "\"" + scopeName + "\"";
            }


            //by default we add the order, type of the scope (scene, sprite, or proc) the name of the scope and the indent
            string toPrint = scopeType + "," + scopeName + "," + indent.ToString();
            bool   added   = false;

            bool addOrder = true;

            foreach (var innerScript in scripts)
            {
                //if the script is primitive, we just print it.
                if (innerScript is JsonPrimitive)
                {
                    if (addOrder)
                    {
                        toPrint += "," + order + "," + innerScript;
                        order    = order + 1;
                        addOrder = false;
                    }
                    else
                    {
                        toPrint += "," + innerScript;
                    }

                    added = true; //it could be that there will be more primitives (arguments) so we only print at the end
                }
                if (innerScript is JsonArray)
                {
                    if (AllOneField((JsonArray)innerScript))
                    {
                        if (innerScript.Count == 0)
                        {
                            //this is an empy array
                            if (addOrder)
                            {
                                toPrint += "," + order + ",[]";
                                order    = order + 1;
                                addOrder = false;
                            }
                            else
                            {
                                toPrint += ",[]";
                            }
                        }
                        else
                        {
                            int j = indent + 1;
                            if (j > maxIndent)
                            {
                                maxIndent = j;
                            }
                            foreach (var item in flatten(ref order, (JsonArray)(innerScript), ref scopeType, ref scopeName, ref j, id, path, ref maxIndent))
                            {
                                result.Add(item);
                            }
                        }
                    }
                    else
                    {
                        if (innerScript.Count > 0 && innerScript[0].ToString() == "\"procDef\"")
                        {
                            //first save this definition to a separate file
                            string procdef = id + "," + scopeName + ",procDef," + innerScript[1].ToString() + "," + innerScript[2].Count.ToString(); //procdef plus name of the proc plus number of arguments
                            JSONGetter.writeStringToFile(procdef, path + "output\\procedures.csv", true);

                            toPrint += ",procdef";
                            //now set the other blocks to the scope of this proc
                            scopeType = "procDef";
                            scopeName = innerScript[1].ToString();

                            added = true;
                        }
                        else
                        {
                            int j = indent + 1;
                            if (j > maxIndent)
                            {
                                maxIndent = j;
                            }
                            foreach (var item in flatten(ref order, (JsonArray)(innerScript), ref scopeType, ref scopeName, ref j, id, path, ref maxIndent))
                            {
                                result.Add(item);
                            }
                        }
                    }
                }
            }

            if (added)
            {
                result.Add(toPrint);
            }



            return(result);
        }
Example #9
0
        internal static void ProcessUnregisteredProjects(string pathToUnregisteredProjects, ProjectScraper p)
        {
            // Create a timer with a two second interval.
            System.Timers.Timer aTimer = new System.Timers.Timer(20000);
            // Hook up the Elapsed event for the timer.
            aTimer.Elapsed  += (Object source, ElapsedEventArgs e) => { Say("Not done yet.."); };
            aTimer.AutoReset = true;
            aTimer.Enabled   = true;


            Say($"Enumerating unregistered project files in {Path.Combine(pathToUnregisteredProjects, "UnregisteredProjects.txt")}.");
            string[] unregisteredProjectIds = File.ReadAllLines(Path.Combine(pathToUnregisteredProjects, "UnregisteredProjects.txt")).Distinct <string>().ToArray();
            Say($"Enumerating unregistered project files done");
            aTimer.Stop(); aTimer.Start();

            Say($"Enumerating existing project files in {pathToUnregisteredProjects}. This could take a very long time...");
            string[] fileNames = Directory.GetFiles(pathToUnregisteredProjects).Select(o => Path.GetFileName(o)).ToArray();
            Say($"Enumerating existing project files done.");
            aTimer.Stop(); aTimer.Start();

            Say($"Creating projects cache. This could take a very long time...");
            Dictionary <string, string> projectCache = new Dictionary <string, string>(fileNames.ToDictionary(x => x.Substring(0, x.IndexOf('.')), x => $".{x.Substring(x.IndexOf('.') + 1)}"));

            Say($"Creating projects cache done.");

            fileNames      = null; //Otherwise, millions of strings will be hanging around for no reason.
            aTimer.Enabled = false;
            aTimer         = null;
            GC.Collect();

            using (ApplicationDatabase ctxt = new ApplicationDatabase())
            {
                foreach (string projectId in unregisteredProjectIds)
                {
                    if (!Int32.TryParse(projectId, out int projectIdAsInt))
                    {
                        continue;
                    }
                    if (ctxt.Projects.AsNoTracking().Any(o => o.Id == projectIdAsInt))
                    {
                        continue;
                    }

                    string baseUrl         = "https://api.scratch.mit.edu/projects/{0}";
                    string projectInfoJson = JSONGetter.GetAsJSONString(string.Format(baseUrl, projectId));

                    JObject       projectObject = JObject.Parse(projectInfoJson);
                    ProjectAuthor author        = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectAuthor>(projectObject["author"].ToString());
                    if (ctxt.Authors.AsNoTracking().Any(o => o.Id == author.id))       //If the author is known...
                    {
                        projectCache.TryGetValue(projectId, out string fileExtension); //Validate if it exists as a file...
                        if (string.IsNullOrEmpty(fileExtension))
                        {
                            p.DownloadProjectToFile(projectId);
                        }

                        Project newProject = p.ParseProject(projectInfoJson, false);
                        newProject.AuthorId = author.id;
                        ctxt.Projects.Add(newProject);
                        ctxt.SaveChanges();

                        //TODO: Optionally immediately parse the actual project and its blocks.
                    }
                    else
                    {
                        Say($"Found project from unknown author: {author.id}");
                    }
                    projectCache.Remove(projectId); //This way, the cache will immediately get rid of now useless entries
                }
            }
        }
Example #10
0
        public string GetAuthorJson(string userName)
        {
            string apiEndpoint = "https://api.scratch.mit.edu/users/" + userName;

            return(JSONGetter.GetAsJSONString(apiEndpoint));
        }
Example #11
0
        public List <Project> GetProjectsByUsername(string userName, bool ignoreRemixes = false)
        {
            string         apiEndpoint       = "https://api.scratch.mit.edu/users/" + userName + "/projects?limit=40&offset={0}";
            bool           endOfDataReached  = false;
            int            offset            = 0;
            List <Project> allProjectsOfUser = new List <Project>();

            try
            {
                while (!endOfDataReached)
                {
                    string specifiedApiEndpoint = string.Format(apiEndpoint, offset);
                    string returnedProjects     = JSONGetter.GetAsJSONString(specifiedApiEndpoint);
                    if (string.IsNullOrEmpty(returnedProjects))
                    {
                        break;
                    }
                    JArray parsedProjects = JArray.Parse(returnedProjects);
                    if (parsedProjects.Count == 0)
                    {
                        endOfDataReached = true;
                    }
                    foreach (var project in parsedProjects)
                    {
                        JObject projectObject    = JObject.Parse(project.ToString());
                        JObject remixObject      = (JObject)projectObject["remix"];
                        JToken  remixRootToken   = remixObject["root"];
                        JToken  remixParentToken = remixObject["parent"];
                        bool    isRemixed        = false;
                        int     remixParent      = 0;
                        int     remixRoot        = 0;
                        if (!string.IsNullOrEmpty(remixRootToken.ToString())) //Check if this is a remixed project
                        {
                            isRemixed = true;
                            remixRoot = Int32.Parse(remixRoot.ToString());
                            if (ignoreRemixes)
                            {
                                continue;
                            }
                        }
                        if (!string.IsNullOrEmpty(remixParentToken.ToString()))
                        {
                            remixParent = Int32.Parse(remixParentToken.ToString());
                            if (ignoreRemixes)
                            {
                                continue;
                            }
                        }
                        if (projectObject["is_published"].Value <bool>() == false) //Check if the project is published (not private)
                        {
                            Console.WriteLine($"P: {projectObject["id"]}");
                            continue;
                        }
                        ProjectStats   projectStats   = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectStats>(projectObject["stats"].ToString());
                        ProjectHistory projectHistory = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectHistory>(projectObject["history"].ToString());
                        Project        toAdd          = new Project
                        {
                            Id             = Int32.Parse(projectObject["id"].ToString()),
                            ProjectName    = projectObject["title"].ToString(),
                            AuthorId       = 0,
                            Author         = null,
                            Created        = projectHistory.created,
                            Modified       = projectHistory.modified,
                            TotalViews     = projectStats.views,
                            TotalFavorites = projectStats.favorites,
                            TotalLoves     = projectStats.loves,
                            Shared         = projectHistory.shared == null ? (DateTime)projectHistory.shared : DateTime.MinValue,
                            IsRemix        = isRemixed,
                            RemixParent    = remixParent,
                            RemixRoot      = remixRoot
                        };
                        allProjectsOfUser.Add(toAdd);
                    }

                    offset += 40;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                return(allProjectsOfUser);
            }
            return(allProjectsOfUser);
        }