public void GetSocialRoles(CommentMetricsViewModel vm) { Console.Write("Enter starting date (YYYY-MM-DD): "); string startDateString = Console.ReadLine(); Console.Write("Enter ending date (YYYY-MM-DD): "); string endDateString = Console.ReadLine(); DateTime startDate = DateTime.Parse(startDateString); DateTime endDate = DateTime.Parse(endDateString); Dictionary <int, int> roles = vm.CalculateSocialRole(startDate, endDate); CsvWriter csv = new CsvWriter(); csv.AddToCurrentLine("UserId"); csv.AddToCurrentLine("SocialRole"); csv.CreateNewRow(); foreach (var kvp in roles) { csv.AddToCurrentLine(kvp.Key.ToString()); csv.AddToCurrentLine(kvp.Value.ToString()); csv.CreateNewRow(); } Console.Write("Enter destination file: "); string outputFileName = Console.ReadLine(); using (TextWriter tw = File.CreateText(outputFileName)) { tw.Write(csv.ToString()); } Console.WriteLine("CSV file written."); }
/// <summary> /// Writes the loaded timeline information to a CSV. The format of the CSV file /// should match so that it could be read by ParseTimeline() /// </summary> /// <param name="fileName"></param> public void WriteLoadedDataToCsv(string fileName) { CsvWriter writer = new CsvWriter(); foreach (var user in Timeline.Values) { //first column is user id writer.AddToCurrentLine(user.OsbideId); //all other rows are state transitions foreach (TimelineState state in user.RawStates) { string stateAsString = ""; if (state.IsSocialEvent) { stateAsString = string.Format("{0};{1}", state.State, state.StartTime); } else { stateAsString = string.Format("{0};{1};{2}", state.State, state.StartTime, state.EndTime); } writer.AddToCurrentLine(stateAsString); } writer.CreateNewRow(); } //write to file using (TextWriter tw = File.CreateText(fileName)) { tw.Write(writer.ToString()); } }
public void WriteToCsv(List <List <string> > matrix, string fileName) { //write results to file CsvWriter writer = new CsvWriter(); foreach (List <string> line in matrix) { foreach (string cell in line) { writer.AddToCurrentLine(cell); } writer.CreateNewRow(); } //write to file using (TextWriter tw = File.CreateText(fileName)) { tw.Write(writer.ToString()); } }
public void WriteToCsv(string fileName) { List <int> keys = _threadedComments.Keys.ToList(); keys.Sort(); CsvWriter csvWriter = new CsvWriter(); //write header csvWriter.AddToCurrentLine("Post ID"); csvWriter.AddToCurrentLine("Date"); csvWriter.AddToCurrentLine("Author"); csvWriter.AddToCurrentLine("AuthorRole"); csvWriter.AddToCurrentLine("Content"); csvWriter.AddToCurrentLine("Reading Ease"); csvWriter.AddToCurrentLine("Reading Grade Level"); csvWriter.AddToCurrentLine("# Helpful Marks"); csvWriter.AddToCurrentLine("Time to first reply (minutes)"); csvWriter.CreateNewRow(); //write entries int commentCounter = 1; foreach (int key in keys) { Comment rootComment = _threadedComments[key]; //calculate time to first reply double numMinutes = -1; if (rootComment.ChildComments.Count > 0) { numMinutes = (rootComment.ChildComments[0].DateReceived - rootComment.DateReceived).TotalMinutes; numMinutes = Math.Round(numMinutes, 2); } //write root, then all child comments csvWriter.AddToCurrentLine(commentCounter.ToString()); csvWriter.AddToCurrentLine(rootComment.DateReceived.ToString("yyyy-MM-dd HH:mm:ss")); csvWriter.AddToCurrentLine(rootComment.UserId.ToString()); csvWriter.AddToCurrentLine(rootComment.User.Role.ToString()); csvWriter.AddToCurrentLine(rootComment.Content); csvWriter.AddToCurrentLine(rootComment.FleschReadingEase.ToString()); csvWriter.AddToCurrentLine(rootComment.FleschKincaidGradeLevel.ToString()); csvWriter.AddToCurrentLine(rootComment.HelpfulMarks); csvWriter.AddToCurrentLine(numMinutes); csvWriter.CreateNewRow(); //now do all children int childCounter = 1; foreach (Comment child in rootComment.ChildComments) { csvWriter.AddToCurrentLine(string.Format("{0}.{1}", commentCounter, childCounter)); csvWriter.AddToCurrentLine(child.DateReceived.ToString("yyyy-MM-dd HH:mm:ss")); csvWriter.AddToCurrentLine(child.UserId.ToString()); csvWriter.AddToCurrentLine(child.User.Role.ToString()); csvWriter.AddToCurrentLine(child.Content); csvWriter.AddToCurrentLine(child.FleschReadingEase.ToString()); csvWriter.AddToCurrentLine(child.FleschKincaidGradeLevel.ToString()); csvWriter.AddToCurrentLine(child.HelpfulMarks); csvWriter.AddToCurrentLine(-1); csvWriter.CreateNewRow(); childCounter++; } commentCounter++; } using (TextWriter tw = File.CreateText(fileName)) { tw.Write(csvWriter.ToString()); } }
/// <summary> /// Writes time in state information to a CSV file /// </summary> /// <param name="fileName"></param> public void WriteTimeInStateToCsv(string fileName) { //write results to file CsvWriter writer = new CsvWriter(); //add header row writer.AddToCurrentLine("User ID"); List <string> states = GetAllStates(); foreach (string state in states) { writer.AddToCurrentLine(state); } //add grades if loaded List <string> grades = GetAllGrades(); foreach (string grade in grades) { writer.AddToCurrentLine(grade); } writer.CreateNewRow(); //add data rows foreach (var item in Timeline.Values) { writer.AddToCurrentLine(item.OsbideId.ToString()); //add state information foreach (string state in states) { //normalized states will use the "NormalizedTimeInState" property. //non-normalized states will use the "MillisecondsInState" property. if (item.GetAggregateState(state).NormalizedTimeInState > 0) { writer.AddToCurrentLine(item.GetAggregateState(state).NormalizedTimeInState.ToString("0.0000000")); } else { writer.AddToCurrentLine(item.GetAggregateState(state).TimeInState.TotalSeconds.ToString()); } } //add grade information foreach (string grade in grades) { if (item.Grades.ContainsKey(grade) == true) { writer.AddToCurrentLine(item.Grades[grade].ToString()); } else { writer.AddToCurrentLine("0"); } } writer.CreateNewRow(); } //write to file using (TextWriter tw = File.CreateText(fileName)) { tw.Write(writer.ToString()); } }
/// <summary> /// Writes transition counts to a CSV file /// </summary> /// <param name="fileName"></param> public void WriteTransitionsToCsv(string fileName) { //write results to file CsvWriter writer = new CsvWriter(); //add header row writer.AddToCurrentLine("User ID"); var query = from item in GetAllTransitions() select new { Key = item.Key, Value = item.Value, AsString = string.Format("{0};{1}", item.Key, item.Value), Kvp = item }; var transitions = query.OrderBy(q => q.AsString).ToList(); foreach (var transition in transitions) { writer.AddToCurrentLine(transition.AsString); } //add grades if loaded List <string> grades = GetAllGrades(); foreach (string grade in grades) { writer.AddToCurrentLine(grade); } writer.CreateNewRow(); //add data rows foreach (var item in Timeline.Values) { writer.AddToCurrentLine(item.OsbideId.ToString()); //add state information foreach (var transition in transitions) { if (item.Transitions.ContainsKey(transition.Kvp) == true) { writer.AddToCurrentLine(item.Transitions[transition.Kvp].ToString()); } else { writer.AddToCurrentLine("0"); } } //add grade information foreach (string grade in grades) { if (item.Grades.ContainsKey(grade) == true) { writer.AddToCurrentLine(item.Grades[grade].ToString()); } else { writer.AddToCurrentLine("0"); } } writer.CreateNewRow(); } //write to file using (TextWriter tw = File.CreateText(fileName)) { tw.Write(writer.ToString()); } }
public void Run() { int userChoice = 0; ActionRequestLogViewModel vm = new ActionRequestLogViewModel(); while (userChoice != (int)MenuOption.Exit) { Console.WriteLine((int)MenuOption.LoadLogsByDate + ". Reload access logs BY DATE from DB (costly)"); Console.WriteLine((int)MenuOption.LoadRawLogsFromDb + ". Reload RAW access logs from DB (costly)"); Console.WriteLine((int)MenuOption.LoadRawLogsFromCache + ". Load RAW access logs from cache"); Console.WriteLine((int)MenuOption.DetailsViewCounts + ". Get details view statistics"); Console.WriteLine((int)MenuOption.AggregateByWeek + ". Aggregate daily activity by week"); Console.WriteLine((int)MenuOption.GenerateWeeklyStatistics + ". Generate summary weekly statistics by controller and action"); Console.WriteLine((int)MenuOption.CountProfileViews + ". Count profile views"); Console.WriteLine((int)MenuOption.Exit + ". Exit"); Console.Write(">> "); string rawInput = Console.ReadLine(); if (Int32.TryParse(rawInput, out userChoice) == false) { Console.WriteLine("Invalid input."); } else { MenuOption selection = (MenuOption)userChoice; switch (selection) { case MenuOption.LoadLogsByDate: vm.LoadLogsByDate(); Console.WriteLine("access logs parsed..."); break; case MenuOption.LoadRawLogsFromDb: vm.LoadRawLogsFromDb(); Console.WriteLine("raw logs loaded..."); break; case MenuOption.LoadRawLogsFromCache: vm.LoadRawLogsFromCache(); break; case MenuOption.DetailsViewCounts: var viewCounts = vm.GenerateDetailsViewStatistics(); foreach (var kvp in viewCounts) { //unique students Dictionary <int, int> students = new Dictionary <int, int>(); foreach (var actionLog in kvp.Value) { if (students.ContainsKey(actionLog.CreatorId) == false) { students.Add(actionLog.CreatorId, 1); } students[actionLog.CreatorId] += 1; } CsvWriter writer = new CsvWriter(); writer.AddToCurrentLine(kvp.Key); writer.CreateNewRow(); foreach (var student in students) { writer.AddToCurrentLine(student.Value); writer.CreateNewRow(); } using (TextWriter tw = File.CreateText(kvp.Key + ".csv")) { tw.Write(writer.ToString()); } Console.WriteLine("{0}: {1} (num students: {2})", kvp.Key, kvp.Value.Count, students.Keys.Count); } break; case MenuOption.AggregateByWeek: List <List <string> > spreadsheet = vm.AggregateLogsByWeek(); Console.Write("Enter destination file: "); string fileName = Console.ReadLine(); vm.WriteToCsv(spreadsheet, fileName); Console.WriteLine("daily activity aggregated..."); break; case MenuOption.GenerateWeeklyStatistics: //aggreate weekly statistics vm.AggregateLogsByWeek(); //generate one CSV file for each controller / action pairing foreach (string controllerName in vm.ControllerActions.Keys) { foreach (string actionName in vm.ControllerActions[controllerName].Keys) { List <List <string> > matrix = vm.FilterActivity(controllerName, actionName); string name = string.Format("weekly_{0}_{1}.csv", controllerName, actionName); vm.WriteToCsv(matrix, name); } } break; case MenuOption.CountProfileViews: var result = vm.CountProfileVisits(); foreach (var kvp in result) { Console.WriteLine("{0}: {1}", kvp.Key, kvp.Value); } break; case MenuOption.Exit: Console.WriteLine("Returning to main menu."); break; default: break; } } Console.WriteLine(""); } }
/// <summary> /// Creates a matrix of cycle activity for each student /// </summary> /// <param name="vm"></param> private void OrderTransitionsByDate(TimelineAnalysisViewModel vm) { //step 1: get list of files to process List <string> filesToProcess = new List <string>(); string fileName = "a"; Console.WriteLine("Enter files to process (-1 to stop)"); while ((fileName = GetFile()).Length > 0) { filesToProcess.Add(fileName); } //step 2: setup grade-bands (e.g. A, B, C, etc.) Hard coded for now as this is just for a //single class double maxScore = 200; double[] gradeRanges = { 90, 78, 69, 60, 0 }; string[] gradeMap = { "A", "B", "C", "D", "F" }; //this produces a lot of files, so create a separate directory for the output string outputDirectory = "TransitionsByDate"; if (Directory.Exists(outputDirectory) == false) { Directory.CreateDirectory(outputDirectory); } //finally, begin processing //reset max score for A students maxScore = 200; //based on currently existing code, it is easier to reopen the file for //each grade range for (int i = 0; i < gradeRanges.Length; i++) { double bound = gradeRanges[i]; //reload the files LoadFile(filesToProcess[0]); for (int j = 1; j < filesToProcess.Count; j++) { vm.AppendTimeline(filesToProcess[j]); } //get grade data vm.AttachGrades(); //filter based on grade data vm.FilterByGrade("Assignment AVG", bound, maxScore); //get transitions for this grade level var result = vm.OrderTransitionsByDate(); var byDate = result.Item1; //update scores for next grade boundary maxScore = bound - 0.01; //hold all day keys for easier access int[] keys = byDate.Keys.OrderBy(k => k).ToArray(); //figure out all transitions Dictionary <string, string> transitionsDict = new Dictionary <string, string>(); foreach (int key in keys) { foreach (string transition in byDate[key].Keys) { transitionsDict[transition] = transition; } } string[] transitions = transitionsDict.Keys.ToArray(); //write aggregate information to a file CsvWriter writer = new CsvWriter(); //blank line for transitions writer.AddToCurrentLine("Transition"); //add in header row foreach (int key in keys) { writer.AddToCurrentLine(key); } writer.CreateNewRow(); //add in data foreach (string transition in transitions) { //data for given transition writer.AddToCurrentLine("T: " + transition); foreach (int key in keys) { if (byDate[key].ContainsKey(transition)) { //add in data for given transition writer.AddToCurrentLine(byDate[key][transition].Count); } else { //no data, add a 0 writer.AddToCurrentLine(0); } } writer.CreateNewRow(); } //aggregate class results using (TextWriter tw = File.CreateText(string.Format("{0}/aggregate_{1}.csv", outputDirectory, gradeMap[i]))) { tw.Write(writer.ToString()); Console.WriteLine("Created file aggregate_{0}.csv", gradeMap[i]); } //write individual student information to file writer = new CsvWriter(); writer.AddToCurrentLine("UserID"); //blank line for transitions writer.AddToCurrentLine("Transition"); //add in header row foreach (int key in keys) { writer.AddToCurrentLine(key); } writer.CreateNewRow(); //add in data var userData = result.Item2; foreach (int userId in userData.Keys) { foreach (string transition in transitions) { //user id writer.AddToCurrentLine(userId); //data for given transition writer.AddToCurrentLine("T: " + transition); foreach (int key in keys) { if (userData[userId][key].ContainsKey(transition)) { //add in data for given transition writer.AddToCurrentLine(userData[userId][key][transition].Count); } else { //no data, add a 0 writer.AddToCurrentLine(0); } } writer.CreateNewRow(); } } using (TextWriter tw = File.CreateText(string.Format("{0}/students_{1}.csv", outputDirectory, gradeMap[i]))) { tw.Write(writer.ToString()); Console.WriteLine("Created file students_{0}.csv", gradeMap[i]); } } }
/// <summary> /// Similar to <see cref="BuildTransitionFrequencyCounts"/>, but instead creates a single file /// per transition length for all students and all loaded files. /// </summary> /// <param name="vm"></param> private void AggregateTransitionFrequencyCounts(TimelineAnalysisViewModel vm) { //step 1: get list of files to process List <string> filesToProcess = new List <string>(); string fileName = "a"; Console.WriteLine("Enter files to process (-1 to stop)"); while ((fileName = GetFile()).Length > 0) { filesToProcess.Add(fileName); } //load all data into VM vm.LoadTimeline(filesToProcess[0]); for (int i = 1; i < filesToProcess.Count; i++) { vm.AppendTimeline(filesToProcess[i]); } //step2: get sequence boundaries. Again, hard coded for now int startingSequenceLength = 2; int endingSequenceLength = 25; //this produces a lot of files, so create a separate directory for the output string outputDirectory = "AggregateTransitionFrequencyCounts"; if (Directory.Exists(outputDirectory) == false) { Directory.CreateDirectory(outputDirectory); } /* * What I need to do: * Get all sequences. * For each sequence: * Determine if similar to other known sequences. If so, combine into same set. (disjoint set?) * */ Dictionary <int, Dictionary <string, int> > allTransitions = new Dictionary <int, Dictionary <string, int> >(); //begin file processing for (int sequenceLength = startingSequenceLength; sequenceLength <= endingSequenceLength; sequenceLength++) { //get grade data vm.AttachGrades(); //build markov transitions vm.BuildDefaultMarkovStates(); //figure out sequence distribution for entire data set and for individual students Dictionary <string, int> transitions = vm.GetAllTransitionCombinations(sequenceLength); //filter out singletons var smallKeys = transitions.Where(t => t.Value < 5).Select(t => t.Key).ToList(); foreach (string key in smallKeys) { transitions.Remove(key); } //save for future use allTransitions.Add(sequenceLength, transitions); Console.WriteLine("Loaded transitions of length {0}.", sequenceLength); } //use Needleman-Wunsch algorithm and disjoint sets to combine similar sequences DisjointSet <string> matches = new DisjointSet <string>(); int matchCount = 0; //start with large sequences as it will make it more likely that these will be the "top" of the disjoint set int startingNumber = (int)'a'; for (int sequenceLength = endingSequenceLength; sequenceLength >= startingSequenceLength; sequenceLength--) { Console.WriteLine("Matching sequences of length {0}", sequenceLength); //Needleman-Wunsch works on single characters, so we need to transform Markov-like numbers to letters Dictionary <string, int> originalSequences = allTransitions[sequenceLength]; Dictionary <string, int> modifiedSequences = new Dictionary <string, int>(); foreach (var kvp in originalSequences) { //convert into numbers int[] pieces = kvp.Key.Split('_').Select(k => Convert.ToInt32(k) + startingNumber).ToArray(); //then, convert back to characters char[] sequence = pieces.Select(p => Convert.ToChar(p)).ToArray(); //and finally into a string string charSequence = string.Join("_", sequence); //lastly, remember this sequence modifiedSequences.Add(charSequence, kvp.Value); } //prime the disjoint set foreach (string key in modifiedSequences.Keys) { matches.Find(key); } //having converted to character state representations, now run the Needleman-Wunsch algorithm List <string> sequences = modifiedSequences.Keys.ToList(); for (int i = 0; i < sequences.Count; i++) { for (int j = i + 1; j < sequences.Count; j++) { string first = matches.Find(sequences[i]); string second = matches.Find(sequences[j]); //automatically count sequences as the same when one sequence is a complete substring of another sequence string firstSequence = sequences[i]; string secondSequence = sequences[j]; if (firstSequence.Replace(secondSequence, "").Length == 0 || secondSequence.Replace(firstSequence, "").Length == 0 ) { matches.UnionWith(first, second); matchCount++; } else { //Use NW to check for alignment //align the two sequences var result = NeedlemanWunsch.Align(first, second); //if score is similar, then count the sequences as the same (union) if ((double)NeedlemanWunsch.ScoreNpsmSequence(result.Item1, result.Item2) < 3) { matches.UnionWith(first, second); matchCount++; } } } } } //now, get all sets and figure out popularity of each set Console.WriteLine("{0} unions performed.", matchCount); List <List <string> > allSets = matches.AllSets(); List <List <string> > smallerSets = allSets.Where(s => s.Count > 1).ToList(); Dictionary <string, int> popularityDict = new Dictionary <string, int>(); Console.WriteLine("Calculating popularity of {0} sets...", allSets.Count); foreach (List <string> set in allSets) { foreach (string item in set) { //convert back to Markov-style transitions int[] pieces = item.Split('_').Select(c => Convert.ToChar(c)).Select(c => (int)c - startingNumber).ToArray(); string key = string.Join("_", pieces); if (popularityDict.ContainsKey(key) == false) { popularityDict.Add(key, 0); } //add in counts to the popularity dictionary popularityDict[key] += allTransitions[pieces.Length][key]; } } //write this information to a file CsvWriter writer = new CsvWriter(); //aggregate class results Console.WriteLine("Writing most popular sequences to file."); foreach (KeyValuePair <string, int> kvp in popularityDict.OrderByDescending(p => p.Value)) { int[] pieces = kvp.Key.Split('_').Select(c => Convert.ToInt32(c)).ToArray(); string npsmKey = string.Join("_", pieces.Select(p => vm.StateNumberToNpsmString(p)).ToArray()); writer.AddToCurrentLine(npsmKey); writer.AddToCurrentLine(kvp.Value.ToString()); writer.CreateNewRow(); } using (TextWriter tw = File.CreateText(string.Format("popular_sequences.csv"))) { tw.Write(writer.ToString()); } }
/// <summary> /// Locates common transition sequence cycles /// </summary> /// <param name="vm"></param> private void LocateCommonTransitionCycles(TimelineAnalysisViewModel vm) { //write results to a file vm.NormalizeProgrammingStates(); vm.AttachGrades(); CsvWriter writer = new CsvWriter(); //build header writer.AddToCurrentLine("UserId"); writer.AddToCurrentLine("TotalTimeProgramming"); writer.AddToCurrentLine("NumberOfCycles"); foreach (string[] sequence in vm.InterestingSequences) { string sequenceHeader = String.Join("_", sequence); writer.AddToCurrentLine("Time_" + sequenceHeader); writer.AddToCurrentLine("NormalizedTime_" + sequenceHeader); writer.AddToCurrentLine("Count_" + sequenceHeader); writer.AddToCurrentLine("NormalizedCount_" + sequenceHeader); } writer.AddToCurrentLine("CycleTime"); writer.AddToCurrentLine("PercentTimeAccountedFor"); //add grades if loaded List <string> grades = vm.GetAllGrades(); foreach (string grade in grades) { writer.AddToCurrentLine(grade); } writer.CreateNewRow(); //write data cells foreach (int userId in vm.StatesByUser.Keys) { writer.AddToCurrentLine(userId); //get total time spent programming StudentTimeline userTimeline = vm.Timeline[userId]; TimelineState timeState = userTimeline.GetAggregateState("normalized_total_time"); TimeSpan totalTime = timeState.EndTime - timeState.StartTime; writer.AddToCurrentLine(totalTime.TotalMinutes); TimeSpan cycleTime = new TimeSpan(); //and total number of cycles int totalCycles = 0; foreach (string[] sequence in vm.InterestingSequences) { string sequenceKey = String.Join("_", sequence); totalCycles += vm.StatesByUser[userId][sequenceKey].Count; } writer.AddToCurrentLine(totalCycles); //write total time spent in cycle foreach (string[] sequence in vm.InterestingSequences) { string sequenceKey = String.Join("_", sequence); TimeSpan sequenceTime = new TimeSpan(); foreach (PatternParserResult pattern in vm.StatesByUser[userId][sequenceKey]) { foreach (TimelineState state in pattern.StateSequence) { if (state.EndTime < state.StartTime) { throw new Exception("EndTime must be larger than StartTime"); } sequenceTime += state.EndTime - state.StartTime; } } cycleTime += sequenceTime; //total time writer.AddToCurrentLine(sequenceTime.TotalMinutes); //normalized time writer.AddToCurrentLine(Math.Round((sequenceTime.TotalMinutes / totalTime.TotalMinutes) * 100, 2)); //count writer.AddToCurrentLine(vm.StatesByUser[userId][sequenceKey].Count); //normalized count double normalizedCount = ((vm.StatesByUser[userId][sequenceKey].Count) / (double)totalCycles) * 100; writer.AddToCurrentLine(Math.Round(normalizedCount, 2)); } writer.AddToCurrentLine(cycleTime.TotalMinutes); writer.AddToCurrentLine(Math.Round((cycleTime.TotalMinutes / totalTime.TotalMinutes) * 100, 2)); //add grade information foreach (string grade in grades) { if (userTimeline.Grades.ContainsKey(grade) == true) { writer.AddToCurrentLine(userTimeline.Grades[grade].ToString()); } else { writer.AddToCurrentLine("0"); } } writer.CreateNewRow(); } using (TextWriter tw = File.CreateText("sequence_cycles.csv")) { tw.Write(writer.ToString()); } Console.WriteLine("Finished locating sequences."); }
/* * What I want to do: * For each assignment: * figure out common sequences of length m to n * For each student, for each grade band (A-F), again determine frequences of length m to n * Build a frequency distribution for each grade band by sequence * */ private void BuildTransitionFrequencyCounts(TimelineAnalysisViewModel vm) { //step 1: get list of files to process List <string> filesToProcess = new List <string>(); string fileName = "a"; Console.WriteLine("Enter files to process (-1 to stop)"); while ((fileName = GetFile()).Length > 0) { filesToProcess.Add(fileName); } //step 2: setup grade-bands (e.g. A, B, C, etc.) Hard coded for now as this is just for a //single class double maxScore = 200; double[] gradeRanges = { 90, 78, 69, 60, 0 }; string[] gradeMap = { "A", "B", "C", "D", "F" }; //step 3: get sequence boundaries. Again, hard coded for now int startingSequenceLength = 2; int endingSequenceLength = 25; //step 4: get assignments. string[] assignments = { "Assignment #1", "Assignment #2", "Assignment #3", "Assignment #4", "Assignment #5", "Assignment #6", "Assignment #7" }; int assignmentCounter = 0; //this produces a lot of files, so create a separate directory for the output string outputDirectory = "TransitionFrequencyCounts"; if (Directory.Exists(outputDirectory) == false) { Directory.CreateDirectory(outputDirectory); } //finally, begin processing foreach (string fileToProcess in filesToProcess) { string folderName = fileToProcess.Replace("#", ""); string outputPath = Path.Combine(outputDirectory, folderName); if (Directory.Exists(outputPath) == false) { Directory.CreateDirectory(outputPath); } for (int sequenceLength = startingSequenceLength; sequenceLength <= endingSequenceLength; sequenceLength++) { //reset max score for A students maxScore = 200; //based on currently existing code, it is easier to reopen the file for //each grade range for (int i = 0; i < gradeRanges.Length; i++) { double bound = gradeRanges[i]; //reload the file LoadFile(fileToProcess); //get grade data vm.AttachGrades(); //filter based on grade data vm.FilterByGrade(assignments[assignmentCounter], bound, maxScore); //update scores for next grade boundary maxScore = bound - 0.01; //build markov transitions vm.BuildDefaultMarkovStates(); //figure out sequence distribution for entire data set and for individual students Dictionary <string, int> transitions = vm.GetAllTransitionCombinations(sequenceLength); //interesting transitions are those in which we have at least 5 occurrances var interestingTransitions = transitions.Where(t => t.Value > 5).OrderBy(t => t.Value).ToList(); //write this information to a file CsvWriter writer = new CsvWriter(); //aggregate class results Console.WriteLine("Processing transition sequences of length {0}...", sequenceLength); foreach (KeyValuePair <string, int> kvp in interestingTransitions) { writer.AddToCurrentLine(kvp.Key); writer.AddToCurrentLine(kvp.Value.ToString()); writer.CreateNewRow(); } using (TextWriter tw = File.CreateText(string.Format("{0}/aggregate_{1}_{2}.csv", outputPath, sequenceLength, gradeMap[i]))) { tw.Write(writer.ToString()); } //individual students //add header data writer = new CsvWriter(); writer.AddToCurrentLine("UserId"); writer.AddToCurrentLine("Grade"); foreach (var kvp in interestingTransitions) { writer.AddToCurrentLine(kvp.Key); } writer.CreateNewRow(); foreach (var user in vm.Timeline.Values) { //first row for users is raw values writer.AddToCurrentLine(user.OsbideId); writer.AddToCurrentLine(gradeMap[i]); //only use the interesting states as columns as identified in the aggregate analysis foreach (KeyValuePair <string, int> kvp in interestingTransitions) { if (user.TransitionCounts.ContainsKey(kvp.Key) == true) { writer.AddToCurrentLine(user.TransitionCounts[kvp.Key]); } else { writer.AddToCurrentLine("0"); } } writer.CreateNewRow(); //2nd row contains normalized values writer.AddToCurrentLine(user.OsbideId); writer.AddToCurrentLine(gradeMap[i]); int totalTransitions = user.TransitionCounts.Values.Sum(); //only use the interesting states as columns as identified in the aggregate analysis foreach (KeyValuePair <string, int> kvp in interestingTransitions) { if (user.TransitionCounts.ContainsKey(kvp.Key) == true) { writer.AddToCurrentLine(user.TransitionCounts[kvp.Key] / (double)totalTransitions); } else { writer.AddToCurrentLine("0"); } } writer.CreateNewRow(); } using (TextWriter tw = File.CreateText(string.Format("{0}/individual_{1}_{2}.csv", outputPath, sequenceLength, gradeMap[i]))) { tw.Write(writer.ToString()); } } } //move to the next assignment assignmentCounter++; } }