/// <summary> /// This method can be used for collecting data from API endpoints that return JSON arrays of User objects. /// These API endpoints are characterized by the /users/ route. /// Examples are users following a specific user, or the users that specific user is following. /// </summary> /// <param name="userName">The first parameter to the /users/ API endpoint.</param> /// <param name="route">The route modifier. Either "followers" or "following"</param> /// <returns></returns> private JsonArray GetAllFollowersOrFollowingsByUsername(string userName, string route) { string apiEndpoint = "https://api.scratch.mit.edu/users/" + userName + "/" + route + "?limit=40&offset={0}"; bool endOfDataReached = false; int offset = 0; JsonArray allUsers = new JsonArray(); while (!endOfDataReached) { string specifiedApiEndpoint = string.Format(apiEndpoint, offset); string returnedUsersJson = JSONGetter.GetAsJSONString(specifiedApiEndpoint); if (string.IsNullOrEmpty(returnedUsersJson)) { break; } var parsedUsers = JsonValue.Parse(returnedUsersJson); if (parsedUsers.Count == 0) { endOfDataReached = true; } foreach (var follower in parsedUsers) { allUsers.Add(follower.Value); } offset += 40; } return(allUsers); }
public static JSONGetter GetJSON() { GameObject json = new GameObject("JSONGetter"); json.AddComponent <JSONGetter>(); JSONGetter jsonGetter = json.GetComponent <JSONGetter>() as JSONGetter; return(jsonGetter); }
private void OnSubmitAction() { WKSigleton.Instance.GetPlayerName = _PlayerName.text; JSONGetter jsonGetter = JSONGetter.GetJSON(); jsonGetter._CreateName.name = _PlayerName.text; jsonGetter.StartParsing(EJSONType.JSON_PLAYERNAME, JSONAction); }
public void Scrape(int skip = 0) { string[] allURLSToConsider = new string[] { "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=trending&q=*", "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=popular&q=*", "https://api.scratch.mit.edu/explore/projects?limit=40&offset={0}&mode=recent&q=*", "https://api.scratch.mit.edu/search/projects?limit=40q=*&offset={0}", }; foreach (string URL in allURLSToConsider) { int offset = skip; string baseURL = URL; bool stopScraping = false; try { while (stopScraping != true) { Console.WriteLine("Scraping at offset: " + offset.ToString()); string specificURL = string.Format(baseURL, offset.ToString()); string rawJson = JSONGetter.GetAsJSONString(specificURL); if (string.IsNullOrEmpty(rawJson)) { Console.WriteLine("\t\tGetJSON2 returned null."); continue; } dynamic projectsObject = JsonValue.Parse(rawJson); List <ProjectAuthor> scrapedAuthors = new List <ProjectAuthor>(); foreach (var projectData in projectsObject) { string authorJson = GetAuthorJson(projectData["author"]["username"].ReadAs <string>()); ProjectAuthor projectAuthor = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectAuthor>(authorJson); scrapedAuthors.Add(projectAuthor); WriteAuthorToFile(projectAuthor.username, authorJson); } SaveAuthorsToDatabase(ProjectAuthorsToDatabaseEntities(scrapedAuthors.GroupBy(x => x.id).Select(y => y.First()).ToList())); offset += 40; if (offset > 9980) { stopScraping = true; } } } catch (Exception ex) { Console.WriteLine($"Exception ocurred: {ex.Message}"); offset += 40; continue; } } }
public static void writeProperties(string _path) { //this method assumes that in you have scraped a number of Scratch files //it will then put all the corresponding properties in /properties DirectoryInfo d = new DirectoryInfo(_path); FileInfo[] Files = d.GetFiles(); //Getting files int i = 0; foreach (FileInfo file in Files) { //get the id: string id = Path.GetFileNameWithoutExtension(file.Name); string projectURL = @"https://scratch.mit.edu/projects/" + id + "/?x=" + DateTime.Now.ToString(); //we are adding a fake quety string to prevent the browser form loading from the cache and getting old data var HTML = JSONGetter.GetJSON(projectURL); if (HTML != null) { if (isShared(HTML)) { string pathForProperties = _path + "properties\\properties.sb"; JSONGetter.writeStringToFile(id + ",", pathForProperties, true, false); FindCountandWritetoFile(HTML, "fav-count", pathForProperties); FindCountandWritetoFile(HTML, "love-count", pathForProperties); FindCountandWritetoFile(HTML, "icon views", pathForProperties); FindCountandWritetoFile(HTML, "icon remix-tree", pathForProperties); FindCountandWritetoFile(HTML, "Shared:", pathForProperties); FindCountandWritetoFile(HTML, "Modified:", pathForProperties); FindUserWritetoFile(HTML, pathForProperties); } else { string pathForProperties = _path + "properties\\notShared.sb"; JSONGetter.writeStringToFile(id, pathForProperties, true, true); } } Console.WriteLine(i.ToString()); i++; } }
private static void FindUserWritetoFile(string HTML, string pathForProperties) { var toFind = "id=\"owner"; var found = HTML.IndexOf(toFind); if (found != -1) { var endofSpan = HTML.IndexOf("</span>", found); var item = HTML.Substring(found + toFind.Length + 2, endofSpan - found - toFind.Length - 2); var itemNoSpaces = item.Replace(" ", "").Replace(" ", "").Replace("\n", ""); JSONGetter.writeStringToFile(itemNoSpaces, pathForProperties, true, true); } }
private static void FindCountandWritetoFile(string HTML, string toFind, string pathForProperties) { var found = HTML.IndexOf(toFind); if (found != -1) { var endofSpan = HTML.IndexOf("</span>", found); var item = HTML.Substring(found + toFind.Length + 2, endofSpan - found - toFind.Length - 2); var itemNoSpacesandComma = item.Replace(" ", "").Replace(" ", "").Replace("\n", "") + ","; if (itemNoSpacesandComma == ",") { itemNoSpacesandComma = "0,"; } JSONGetter.writeStringToFile(itemNoSpacesandComma, pathForProperties, true, false); } }
static ArrayList flatten(ref int order, JsonArray scripts, ref string scopeType, ref string scopeName, ref int indent, string path, string id, ref int maxIndent) { var result = new ArrayList(); if (scopeName[0] != '"') { //not in quotes? add them scopeName = "\"" + scopeName + "\""; } //by default we add the order, type of the scope (scene, sprite, or proc) the name of the scope and the indent string toPrint = scopeType + "," + scopeName + "," + indent.ToString(); bool added = false; bool addOrder = true; foreach (var innerScript in scripts) { //if the script is primitive, we just print it. if (innerScript is JsonPrimitive) { if (addOrder) { toPrint += "," + order + "," + innerScript; order = order + 1; addOrder = false; } else { toPrint += "," + innerScript; } added = true; //it could be that there will be more primitives (arguments) so we only print at the end } if (innerScript is JsonArray) { if (AllOneField((JsonArray)innerScript)) { if (innerScript.Count == 0) { //this is an empy array if (addOrder) { toPrint += "," + order + ",[]"; order = order + 1; addOrder = false; } else { toPrint += ",[]"; } } else { int j = indent + 1; if (j > maxIndent) { maxIndent = j; } foreach (var item in flatten(ref order, (JsonArray)(innerScript), ref scopeType, ref scopeName, ref j, id, path, ref maxIndent)) { result.Add(item); } } } else { if (innerScript.Count > 0 && innerScript[0].ToString() == "\"procDef\"") { //first save this definition to a separate file string procdef = id + "," + scopeName + ",procDef," + innerScript[1].ToString() + "," + innerScript[2].Count.ToString(); //procdef plus name of the proc plus number of arguments JSONGetter.writeStringToFile(procdef, path + "output\\procedures.csv", true); toPrint += ",procdef"; //now set the other blocks to the scope of this proc scopeType = "procDef"; scopeName = innerScript[1].ToString(); added = true; } else { int j = indent + 1; if (j > maxIndent) { maxIndent = j; } foreach (var item in flatten(ref order, (JsonArray)(innerScript), ref scopeType, ref scopeName, ref j, id, path, ref maxIndent)) { result.Add(item); } } } } } if (added) { result.Add(toPrint); } return(result); }
internal static void ProcessUnregisteredProjects(string pathToUnregisteredProjects, ProjectScraper p) { // Create a timer with a two second interval. System.Timers.Timer aTimer = new System.Timers.Timer(20000); // Hook up the Elapsed event for the timer. aTimer.Elapsed += (Object source, ElapsedEventArgs e) => { Say("Not done yet.."); }; aTimer.AutoReset = true; aTimer.Enabled = true; Say($"Enumerating unregistered project files in {Path.Combine(pathToUnregisteredProjects, "UnregisteredProjects.txt")}."); string[] unregisteredProjectIds = File.ReadAllLines(Path.Combine(pathToUnregisteredProjects, "UnregisteredProjects.txt")).Distinct <string>().ToArray(); Say($"Enumerating unregistered project files done"); aTimer.Stop(); aTimer.Start(); Say($"Enumerating existing project files in {pathToUnregisteredProjects}. This could take a very long time..."); string[] fileNames = Directory.GetFiles(pathToUnregisteredProjects).Select(o => Path.GetFileName(o)).ToArray(); Say($"Enumerating existing project files done."); aTimer.Stop(); aTimer.Start(); Say($"Creating projects cache. This could take a very long time..."); Dictionary <string, string> projectCache = new Dictionary <string, string>(fileNames.ToDictionary(x => x.Substring(0, x.IndexOf('.')), x => $".{x.Substring(x.IndexOf('.') + 1)}")); Say($"Creating projects cache done."); fileNames = null; //Otherwise, millions of strings will be hanging around for no reason. aTimer.Enabled = false; aTimer = null; GC.Collect(); using (ApplicationDatabase ctxt = new ApplicationDatabase()) { foreach (string projectId in unregisteredProjectIds) { if (!Int32.TryParse(projectId, out int projectIdAsInt)) { continue; } if (ctxt.Projects.AsNoTracking().Any(o => o.Id == projectIdAsInt)) { continue; } string baseUrl = "https://api.scratch.mit.edu/projects/{0}"; string projectInfoJson = JSONGetter.GetAsJSONString(string.Format(baseUrl, projectId)); JObject projectObject = JObject.Parse(projectInfoJson); ProjectAuthor author = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectAuthor>(projectObject["author"].ToString()); if (ctxt.Authors.AsNoTracking().Any(o => o.Id == author.id)) //If the author is known... { projectCache.TryGetValue(projectId, out string fileExtension); //Validate if it exists as a file... if (string.IsNullOrEmpty(fileExtension)) { p.DownloadProjectToFile(projectId); } Project newProject = p.ParseProject(projectInfoJson, false); newProject.AuthorId = author.id; ctxt.Projects.Add(newProject); ctxt.SaveChanges(); //TODO: Optionally immediately parse the actual project and its blocks. } else { Say($"Found project from unknown author: {author.id}"); } projectCache.Remove(projectId); //This way, the cache will immediately get rid of now useless entries } } }
public string GetAuthorJson(string userName) { string apiEndpoint = "https://api.scratch.mit.edu/users/" + userName; return(JSONGetter.GetAsJSONString(apiEndpoint)); }
public List <Project> GetProjectsByUsername(string userName, bool ignoreRemixes = false) { string apiEndpoint = "https://api.scratch.mit.edu/users/" + userName + "/projects?limit=40&offset={0}"; bool endOfDataReached = false; int offset = 0; List <Project> allProjectsOfUser = new List <Project>(); try { while (!endOfDataReached) { string specifiedApiEndpoint = string.Format(apiEndpoint, offset); string returnedProjects = JSONGetter.GetAsJSONString(specifiedApiEndpoint); if (string.IsNullOrEmpty(returnedProjects)) { break; } JArray parsedProjects = JArray.Parse(returnedProjects); if (parsedProjects.Count == 0) { endOfDataReached = true; } foreach (var project in parsedProjects) { JObject projectObject = JObject.Parse(project.ToString()); JObject remixObject = (JObject)projectObject["remix"]; JToken remixRootToken = remixObject["root"]; JToken remixParentToken = remixObject["parent"]; bool isRemixed = false; int remixParent = 0; int remixRoot = 0; if (!string.IsNullOrEmpty(remixRootToken.ToString())) //Check if this is a remixed project { isRemixed = true; remixRoot = Int32.Parse(remixRoot.ToString()); if (ignoreRemixes) { continue; } } if (!string.IsNullOrEmpty(remixParentToken.ToString())) { remixParent = Int32.Parse(remixParentToken.ToString()); if (ignoreRemixes) { continue; } } if (projectObject["is_published"].Value <bool>() == false) //Check if the project is published (not private) { Console.WriteLine($"P: {projectObject["id"]}"); continue; } ProjectStats projectStats = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectStats>(projectObject["stats"].ToString()); ProjectHistory projectHistory = Newtonsoft.Json.JsonConvert.DeserializeObject <ProjectHistory>(projectObject["history"].ToString()); Project toAdd = new Project { Id = Int32.Parse(projectObject["id"].ToString()), ProjectName = projectObject["title"].ToString(), AuthorId = 0, Author = null, Created = projectHistory.created, Modified = projectHistory.modified, TotalViews = projectStats.views, TotalFavorites = projectStats.favorites, TotalLoves = projectStats.loves, Shared = projectHistory.shared == null ? (DateTime)projectHistory.shared : DateTime.MinValue, IsRemix = isRemixed, RemixParent = remixParent, RemixRoot = remixRoot }; allProjectsOfUser.Add(toAdd); } offset += 40; } } catch (Exception ex) { Console.WriteLine(ex.Message); return(allProjectsOfUser); } return(allProjectsOfUser); }