Esempio n. 1
0
        /// <summary>
        /// Creates a matrix of cycle activity for each student
        /// </summary>
        /// <param name="vm"></param>
        private void OrderTransitionsByDate(TimelineAnalysisViewModel vm)
        {
            //step 1: get list of files to process
            List <string> filesToProcess = new List <string>();
            string        fileName       = "a";

            Console.WriteLine("Enter files to process (-1 to stop)");
            while ((fileName = GetFile()).Length > 0)
            {
                filesToProcess.Add(fileName);
            }

            //step 2: setup grade-bands (e.g. A, B, C, etc.)  Hard coded for now as this is just for a
            //single class
            double maxScore = 200;

            double[] gradeRanges = { 90, 78, 69, 60, 0 };
            string[] gradeMap    = { "A", "B", "C", "D", "F" };

            //this produces a lot of files, so create a separate directory for the output
            string outputDirectory = "TransitionsByDate";

            if (Directory.Exists(outputDirectory) == false)
            {
                Directory.CreateDirectory(outputDirectory);
            }

            //finally, begin processing
            //reset max score for A students
            maxScore = 200;

            //based on currently existing code, it is easier to reopen the file for
            //each grade range
            for (int i = 0; i < gradeRanges.Length; i++)
            {
                double bound = gradeRanges[i];

                //reload the files
                LoadFile(filesToProcess[0]);
                for (int j = 1; j < filesToProcess.Count; j++)
                {
                    vm.AppendTimeline(filesToProcess[j]);
                }

                //get grade data
                vm.AttachGrades();

                //filter based on grade data
                vm.FilterByGrade("Assignment AVG", bound, maxScore);

                //get transitions for this grade level
                var result = vm.OrderTransitionsByDate();
                var byDate = result.Item1;

                //update scores for next grade boundary
                maxScore = bound - 0.01;

                //hold all day keys for easier access
                int[] keys = byDate.Keys.OrderBy(k => k).ToArray();

                //figure out all transitions
                Dictionary <string, string> transitionsDict = new Dictionary <string, string>();
                foreach (int key in keys)
                {
                    foreach (string transition in byDate[key].Keys)
                    {
                        transitionsDict[transition] = transition;
                    }
                }
                string[] transitions = transitionsDict.Keys.ToArray();

                //write aggregate information to a file
                CsvWriter writer = new CsvWriter();

                //blank line for transitions
                writer.AddToCurrentLine("Transition");

                //add in header row
                foreach (int key in keys)
                {
                    writer.AddToCurrentLine(key);
                }
                writer.CreateNewRow();

                //add in data
                foreach (string transition in transitions)
                {
                    //data for given transition
                    writer.AddToCurrentLine("T: " + transition);
                    foreach (int key in keys)
                    {
                        if (byDate[key].ContainsKey(transition))
                        {
                            //add in data for given transition
                            writer.AddToCurrentLine(byDate[key][transition].Count);
                        }
                        else
                        {
                            //no data, add a 0
                            writer.AddToCurrentLine(0);
                        }
                    }
                    writer.CreateNewRow();
                }

                //aggregate class results
                using (TextWriter tw = File.CreateText(string.Format("{0}/aggregate_{1}.csv", outputDirectory, gradeMap[i])))
                {
                    tw.Write(writer.ToString());
                    Console.WriteLine("Created file aggregate_{0}.csv", gradeMap[i]);
                }

                //write individual student information to file
                writer = new CsvWriter();

                writer.AddToCurrentLine("UserID");

                //blank line for transitions
                writer.AddToCurrentLine("Transition");

                //add in header row
                foreach (int key in keys)
                {
                    writer.AddToCurrentLine(key);
                }
                writer.CreateNewRow();

                //add in data
                var userData = result.Item2;
                foreach (int userId in userData.Keys)
                {
                    foreach (string transition in transitions)
                    {
                        //user id
                        writer.AddToCurrentLine(userId);

                        //data for given transition
                        writer.AddToCurrentLine("T: " + transition);
                        foreach (int key in keys)
                        {
                            if (userData[userId][key].ContainsKey(transition))
                            {
                                //add in data for given transition
                                writer.AddToCurrentLine(userData[userId][key][transition].Count);
                            }
                            else
                            {
                                //no data, add a 0
                                writer.AddToCurrentLine(0);
                            }
                        }
                        writer.CreateNewRow();
                    }
                }

                using (TextWriter tw = File.CreateText(string.Format("{0}/students_{1}.csv", outputDirectory, gradeMap[i])))
                {
                    tw.Write(writer.ToString());
                    Console.WriteLine("Created file students_{0}.csv", gradeMap[i]);
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Similar to <see cref="BuildTransitionFrequencyCounts"/>, but instead creates a single file
        /// per transition length for all students and all loaded files.
        /// </summary>
        /// <param name="vm"></param>
        private void AggregateTransitionFrequencyCounts(TimelineAnalysisViewModel vm)
        {
            //step 1: get list of files to process
            List <string> filesToProcess = new List <string>();
            string        fileName       = "a";

            Console.WriteLine("Enter files to process (-1 to stop)");
            while ((fileName = GetFile()).Length > 0)
            {
                filesToProcess.Add(fileName);
            }

            //load all data into VM
            vm.LoadTimeline(filesToProcess[0]);
            for (int i = 1; i < filesToProcess.Count; i++)
            {
                vm.AppendTimeline(filesToProcess[i]);
            }

            //step2: get sequence boundaries.  Again, hard coded for now
            int startingSequenceLength = 2;
            int endingSequenceLength   = 25;

            //this produces a lot of files, so create a separate directory for the output
            string outputDirectory = "AggregateTransitionFrequencyCounts";

            if (Directory.Exists(outputDirectory) == false)
            {
                Directory.CreateDirectory(outputDirectory);
            }

            /*
             * What I need to do:
             * Get all sequences.
             * For each sequence:
             *      Determine if similar to other known sequences.  If so, combine into same set. (disjoint set?)
             * */

            Dictionary <int, Dictionary <string, int> > allTransitions = new Dictionary <int, Dictionary <string, int> >();

            //begin file processing
            for (int sequenceLength = startingSequenceLength; sequenceLength <= endingSequenceLength; sequenceLength++)
            {
                //get grade data
                vm.AttachGrades();

                //build markov transitions
                vm.BuildDefaultMarkovStates();

                //figure out sequence distribution for entire data set and for individual students
                Dictionary <string, int> transitions = vm.GetAllTransitionCombinations(sequenceLength);

                //filter out singletons
                var smallKeys = transitions.Where(t => t.Value < 5).Select(t => t.Key).ToList();
                foreach (string key in smallKeys)
                {
                    transitions.Remove(key);
                }

                //save for future use
                allTransitions.Add(sequenceLength, transitions);

                Console.WriteLine("Loaded transitions of length {0}.", sequenceLength);
            }

            //use Needleman-Wunsch algorithm and disjoint sets to combine similar sequences
            DisjointSet <string> matches = new DisjointSet <string>();
            int matchCount = 0;

            //start with large sequences as it will make it more likely that these will be the "top" of the disjoint set
            int startingNumber = (int)'a';

            for (int sequenceLength = endingSequenceLength; sequenceLength >= startingSequenceLength; sequenceLength--)
            {
                Console.WriteLine("Matching sequences of length {0}", sequenceLength);

                //Needleman-Wunsch works on single characters, so we need to transform Markov-like numbers to letters
                Dictionary <string, int> originalSequences = allTransitions[sequenceLength];
                Dictionary <string, int> modifiedSequences = new Dictionary <string, int>();
                foreach (var kvp in originalSequences)
                {
                    //convert into numbers
                    int[] pieces = kvp.Key.Split('_').Select(k => Convert.ToInt32(k) + startingNumber).ToArray();

                    //then, convert back to characters
                    char[] sequence = pieces.Select(p => Convert.ToChar(p)).ToArray();

                    //and finally into a string
                    string charSequence = string.Join("_", sequence);

                    //lastly, remember this sequence
                    modifiedSequences.Add(charSequence, kvp.Value);
                }

                //prime the disjoint set
                foreach (string key in modifiedSequences.Keys)
                {
                    matches.Find(key);
                }

                //having converted to character state representations, now run the Needleman-Wunsch algorithm
                List <string> sequences = modifiedSequences.Keys.ToList();
                for (int i = 0; i < sequences.Count; i++)
                {
                    for (int j = i + 1; j < sequences.Count; j++)
                    {
                        string first  = matches.Find(sequences[i]);
                        string second = matches.Find(sequences[j]);

                        //automatically count sequences as the same when one sequence is a complete substring of another sequence
                        string firstSequence  = sequences[i];
                        string secondSequence = sequences[j];
                        if (firstSequence.Replace(secondSequence, "").Length == 0 ||
                            secondSequence.Replace(firstSequence, "").Length == 0
                            )
                        {
                            matches.UnionWith(first, second);
                            matchCount++;
                        }
                        else
                        {
                            //Use NW to check for alignment
                            //align the two sequences
                            var result = NeedlemanWunsch.Align(first, second);

                            //if score is similar, then count the sequences as the same (union)
                            if ((double)NeedlemanWunsch.ScoreNpsmSequence(result.Item1, result.Item2) < 3)
                            {
                                matches.UnionWith(first, second);
                                matchCount++;
                            }
                        }
                    }
                }
            }

            //now, get all sets and figure out popularity of each set
            Console.WriteLine("{0} unions performed.", matchCount);
            List <List <string> >    allSets        = matches.AllSets();
            List <List <string> >    smallerSets    = allSets.Where(s => s.Count > 1).ToList();
            Dictionary <string, int> popularityDict = new Dictionary <string, int>();

            Console.WriteLine("Calculating popularity of {0} sets...", allSets.Count);
            foreach (List <string> set in allSets)
            {
                foreach (string item in set)
                {
                    //convert back to Markov-style transitions
                    int[]  pieces = item.Split('_').Select(c => Convert.ToChar(c)).Select(c => (int)c - startingNumber).ToArray();
                    string key    = string.Join("_", pieces);

                    if (popularityDict.ContainsKey(key) == false)
                    {
                        popularityDict.Add(key, 0);
                    }

                    //add in counts to the popularity dictionary
                    popularityDict[key] += allTransitions[pieces.Length][key];
                }
            }

            //write this information to a file
            CsvWriter writer = new CsvWriter();

            //aggregate class results
            Console.WriteLine("Writing most popular sequences to file.");
            foreach (KeyValuePair <string, int> kvp in popularityDict.OrderByDescending(p => p.Value))
            {
                int[]  pieces  = kvp.Key.Split('_').Select(c => Convert.ToInt32(c)).ToArray();
                string npsmKey = string.Join("_", pieces.Select(p => vm.StateNumberToNpsmString(p)).ToArray());

                writer.AddToCurrentLine(npsmKey);
                writer.AddToCurrentLine(kvp.Value.ToString());
                writer.CreateNewRow();
            }
            using (TextWriter tw = File.CreateText(string.Format("popular_sequences.csv")))
            {
                tw.Write(writer.ToString());
            }
        }