/// <summary> /// Extracts the given archive. /// </summary> /// <param name="archive">the archive to extract</param> /// <param name="overwrite">true if existing files should be overwritten</param> /// <param name="append">true if only new files should be extracted if the user is already registered</param> private static void Extract(FileSystemInfo archive, bool overwrite, bool append) { string username = Path.GetFileNameWithoutExtension(archive.Name); bool userExists = FileStore.FileExists(Resources.UserDirectory, username + ".json"); if (userExists && !append) { return; } using (ZipArchive zipArchive = ZipFile.OpenRead(archive.FullName)) { foreach (ZipArchiveEntry entry in zipArchive.Entries) { string destination = FileStore.GetAbsolutePath(entry.FullName); if (File.Exists(destination) && overwrite) { entry.ExtractToFile(destination, true); } else if (!File.Exists(destination)) { FileStore.CreateDirectory(Path.GetDirectoryName(entry.FullName)); entry.ExtractToFile(destination, false); } } } }
/// <summary> /// Archives previous project lists of the specified user. /// </summary> /// <param name="archive">the archive to write to</param> /// <param name="username">the username</param> private static void ArchiveOldProjectLists(ZipArchive archive, string username) { string oldProjectListsPath = Resources.ProjectDirectory + "/" + username; FileInfo[] oldProjectLists = FileStore.GetFiles(oldProjectListsPath); foreach (FileInfo oldProjectList in oldProjectLists) { string oldProjectListPath = oldProjectListsPath + "/" + oldProjectList.Name; archive.CreateEntryFromFile(FileStore.GetAbsolutePath(oldProjectListPath), oldProjectListPath); } }
/// <summary> /// Writes all code and their relations to projects to CSV files. /// </summary> /// <param name="skip">the number of projects to skip before parsing</param> /// <param name="limit">the number of projects to parse code for</param> public void WriteCode(int skip, int limit) { DirectoryInfo[] projects = FileStore.GetDirectories(Resources.CodeDirectory); int projectTotal = projects.Length; int projectCurrent = 0; Logger.Log("Parsing code of " + projectTotal + " projects to CSV."); using (CsvWriter commandWriter = new CsvWriter(FileStore.GetAbsolutePath(Resources.CommandsCsv), new[] { "scriptId", "projectId", "date", "depth", "scopeType", "scopeName", "command", "param1", "param2", "param3", "param4", "param5", "param6", "param7", "param8", "param9", "param10", "param11", "param12", "param13", "param14", "param15", "param16", "param17", "param18", "param19", "param20" }, int.Parse(Resources.CommandsCsvLinesPerFile))) using (CsvWriter scriptWriter = new CsvWriter(FileStore.GetAbsolutePath(Resources.ScriptsCsv), new[] { "scriptId", "projectId", "date", "scopeType", "scopeName", "lineCount" }, int.Parse(Resources.ScriptsCsvLinesPerFile))) using (CsvWriter procedureWriter = new CsvWriter(FileStore.GetAbsolutePath(Resources.ProceduresCsv), new[] { "projectId", "date", "scopeType", "scopeName", "name", "argumentCount" }, int.Parse(Resources.ProceduresCsvLinesPerFile))) { foreach (DirectoryInfo project in projects) { projectCurrent++; if (projectCurrent <= skip) { continue; } if (limit > 0 && projectCurrent > skip + limit) { break; } int projectId = int.Parse(project.Name); Logger.Log(LoggerHelper.FormatProgress("Parsing code of project " + projectId, projectCurrent, projectTotal)); foreach (FileInfo codeFile in project.GetFiles()) { string code = File.ReadAllText(codeFile.FullName); string codeDate = codeFile.Name.Substring(0, codeFile.Name.Length - 5); ParsedCode parsedCode = ParseCode(projectId, DateTime.Parse(codeDate), code); WriteAllToCsv(commandWriter, parsedCode.Commands); WriteAllToCsv(scriptWriter, parsedCode.Scripts); WriteAllToCsv(procedureWriter, parsedCode.Procedures); } } } }
/// <summary> /// Writes all user data to CSV files. /// </summary> public void WriteUsers() { using (CsvWriter writer = new CsvWriter(FileStore.GetAbsolutePath(Resources.UsersCsv), new[] { "id", "username", "joinDate", "country" }, int.Parse(Resources.UsersCsvLinesPerFile))) { FileInfo[] userFiles = FileStore.GetFiles(Resources.UserDirectory); int userTotal = userFiles.Length; int userCurrent = 0; Logger.Log("Parsing " + userTotal + " users to CSV."); if (userFiles.Length > 0 && File.ReadAllText(userFiles[0].FullName).Length == 0) { Logger.Log("Missing metadata for users."); return; } foreach (FileInfo userFile in userFiles) { string username = userFile.Name.Remove(userFile.Name.Length - 5); userCurrent++; Logger.Log(LoggerHelper.FormatProgress( "Parsing user " + LoggerHelper.ForceLength(username, 10), userCurrent, userTotal)); string contents = File.ReadAllText(userFile.FullName); if (contents.Length == 0) { Logger.Log("Missing metadata for user " + userFile.Name); return; } JObject user; try { user = JObject.Parse(File.ReadAllText(userFile.FullName)); } catch (JsonReaderException e) { Logger.Log("The metadata for user `" + userFile.Name + "` could not be parsed.", e); return; } writer .Write(int.Parse(user["id"].ToString())) .Write(user["username"].ToString()) .Write(((DateTime)user["history"]["joined"]).ToString("yyyy-MM-ddTHH:mm:ss")) .Write(user["profile"]["country"].ToString()) .Newline(); } } }
/// <summary> /// Creates an archive for the specified user containing all that user's data. /// </summary> /// <param name="username">a username</param> private static void Archive(string username) { FileStore.CreateDirectory(Resources.ArchiveDirectory); if (FileStore.FileExists(Resources.ArchiveDirectory, username + ".zip")) { return; } string archivePath = FileStore.GetAbsolutePath(Resources.ArchiveDirectory, username + ".zip"); using (var fileStream = new FileStream(archivePath, FileMode.CreateNew)) using (var archive = new ZipArchive(fileStream, ZipArchiveMode.Create, true)) { ArchiveUserData(archive, username); ArchiveCurrentProjectList(archive, username); ArchiveOldProjectLists(archive, username); ArchiveProjectCode(archive, username); } }
public void PreparseCodeDuplicates() { DirectoryInfo[] userDirs = FileStore.GetDirectories(Resources.ProjectDirectory); int userTotal = userDirs.Length; int userCurrent = 0; Logger.Log("Removing code duplicates of " + userDirs.Length + " users."); foreach (DirectoryInfo userDir in userDirs) { string username = userDir.Name; userCurrent++; Logger.Log(LoggerHelper.FormatProgress( "Removing code duplicates of " + LoggerHelper.ForceLength(username, 10), userCurrent, userTotal)); Dictionary <int, DateTime> projectDates = new Dictionary <int, DateTime>(); FileInfo[] projectLists = userDir.GetFiles().OrderBy(projectList => projectList.Name).ToArray(); foreach (FileInfo projectList in projectLists) { JArray projects; try { projects = JArray.Parse(File.ReadAllText(projectList.FullName)); } catch (JsonReaderException e) { Logger.Log("The project metadata list of user `" + userDir.Name + "` could not be parsed.", e); return; } foreach (JToken project in projects) { if (!(project is JObject)) { Logger.Log("The metadata of a project of user `" + userDir.Name + "` could not be parsed."); return; } JObject metadata = (JObject)project; int projectId = int.Parse(metadata["id"].ToString()); DateTime modifyDate = DateTime.Parse(metadata["history"]["modified"].ToString()); if (projectDates.ContainsKey(projectId) && projectDates[projectId].Equals(modifyDate)) { Logger.Log("Deleted duplicate code; " + projectId + "/" + projectList.Name); string codePath = FileStore.GetAbsolutePath(Resources.CodeDirectory, projectId + "/" + projectList.Name); if (File.Exists(codePath)) { File.Delete(codePath); } } projectDates[projectId] = modifyDate; } } } }
/// <summary> /// Writes all projects and their relations to authors to CSV files. /// </summary> public void WriteProjects() { using (CsvWriter projectRemixWriter = new CsvWriter(FileStore.GetAbsolutePath(Resources.ProjectRemixCsv), new[] { "childId", "parentId" }, int.Parse(Resources.ProjectRemixCsvLinesPerFile))) using (CsvWriter projectWriter = new CsvWriter(FileStore.GetAbsolutePath(Resources.ProjectsCsv), new[] { "authorId", "date", "projectId", "title", "modifyDate", "createDate", "shareDate", "viewCount", "loveCount", "favoriteCount", "commentCount" }, int.Parse(Resources.ProjectsCsvLinesPerFile))) { DirectoryInfo[] userDirs = FileStore.GetDirectories(Resources.ProjectDirectory); int userTotal = userDirs.Length; int userCurrent = 0; Logger.Log("Parsing metadata for " + userDirs.Length + " users to CSV."); ISet <int> projectHistory = new HashSet <int>(); foreach (DirectoryInfo userDir in userDirs) { string username = userDir.Name; userCurrent++; Logger.Log(LoggerHelper.FormatProgress( "Parsing project lists of user " + LoggerHelper.ForceLength(username, 10), userCurrent, userTotal)); foreach (FileInfo projectListFile in userDir.GetFiles()) { JArray projectList; try { projectList = JArray.Parse(File.ReadAllText(projectListFile.FullName)); } catch (JsonReaderException e) { Logger.Log("The project list for user `" + username + "` could not be parsed.", e); return; } foreach (JToken projectFile in projectList) { if (!(projectFile is JObject)) { Logger.Log("A project of user `" + username + "` could not be parsed."); return; } JObject project = (JObject)projectFile; int authorId = int.Parse(project["author"]["id"].ToString()); int projectId = int.Parse(project["id"].ToString()); string remixParentId = project["remix"]["parent"].ToString(); string dataDate = projectListFile.Name.Substring(0, projectListFile.Name.Length - 5); projectWriter .Write(authorId) .Write(dataDate) .Write(projectId) .Write(project["title"].ToString()) .Write(((DateTime)project["history"]["modified"]).ToString("yyyy-MM-ddTHH:mm:ss")) .Write(((DateTime)project["history"]["created"]).ToString("yyyy-MM-ddTHH:mm:ss")) .Write(((DateTime)project["history"]["shared"]).ToString("yyyy-MM-ddTHH:mm:ss")) .Write(int.Parse(project["stats"]["views"].ToString())) .Write(int.Parse(project["stats"]["loves"].ToString())) .Write(int.Parse(project["stats"]["favorites"].ToString())) .Write(int.Parse(project["stats"]["comments"].ToString())) .Newline(); if (remixParentId != "" && !projectHistory.Contains(projectId)) { projectRemixWriter .Write(projectId) .Write(int.Parse(remixParentId)) .Newline(); } projectHistory.Add(projectId); } } } } }
/// <summary> /// Archives the metadata of the specified user. /// </summary> /// <param name="archive">the archive to write to</param> /// <param name="username">the username</param> private static void ArchiveUserData(ZipArchive archive, string username) { string userDataPath = Resources.UserDirectory + "/" + username + ".json"; archive.CreateEntryFromFile(FileStore.GetAbsolutePath(userDataPath), userDataPath); }
/// <summary> /// Archives the most recent project list of the specified user. /// </summary> /// <param name="archive">the archive to write to</param> /// <param name="username">the username</param> private static void ArchiveCurrentProjectList(ZipArchive archive, string username) { string projectListPath = Resources.ProjectDirectory + "/" + username + ".json"; archive.CreateEntryFromFile(FileStore.GetAbsolutePath(projectListPath), projectListPath); }