/// <summary> /// Group answer sheets in each answer strategy. /// Hierarchical clustering / Group average method /// </summary> /// <param name="answers"></param> /// <param name="feature">Using feature</param> /// <returns></returns> private HierarchicalClusteringResult ClusteringAnswerSheets(List<AnswerSheet> answers, ClassificationFeature feature) { Stopwatch sw = new Stopwatch(); Console.WriteLine("Start answer process grouping: " + feature.ToString()); sw.Start(); AnswerDendrogramTreeNode treeParent = null; // Cache matrix of answer step grouping results List<List<AnswerStep>> answerSteps = new List<List<AnswerStep>>(); if (feature == ClassificationFeature.Proposed) { Console.WriteLine("Answer Step Grouping..."); for (int i = 0, ilen = answers.Count; i < ilen; i++) { List<AnswerStep> s = GroupAnswerStep(answers[i].Strokes); answerSteps.Add(s); } Console.WriteLine("Finished: " + sw.Elapsed); } // Cache matrix of answer process distance results Console.WriteLine("Creating answer process similarity table..."); double[,] answerDistances = new double[answers.Count, answers.Count]; double[,] answerDistances_Time = new double[answers.Count, answers.Count]; double adAvg = 0.0; double adStd = 0.0; double adTimeAvg = 0.0; double adTimeStd = 0.0; int adNum = 0; for (int i = 0, ilen = answers.Count; i < ilen; i++) { answerDistances[i, i] = 0.0; answerDistances_Time[i, i] = 0.0; } for (int i = 0, ilen = answers.Count; i < ilen; i++) { for (int k = i + 1; k < ilen; k++) { // proposed method if (feature == ClassificationFeature.Proposed) { answerDistances[i, k] = CalcAnswerProcessSimilarity(answerSteps[i], answerSteps[k]).Distance; answerDistances[k, i] = answerDistances[i, k]; adAvg += answerDistances[i, k]; adStd += answerDistances[i, k] * answerDistances[i, k]; } // answering time if (feature == ClassificationFeature.AnswerTime) { // answering time answerDistances_Time[i, k] = Math.Abs((long)(answers[i].AnswerTime - answers[k].AnswerTime)); // writing time //answerDistances_Time[i, k] = Math.Abs((long)(answers[i].WritingTime - answers[k].WritingTime)); answerDistances_Time[k, i] = answerDistances_Time[i, k]; adTimeAvg += answerDistances_Time[i, k]; adTimeStd += answerDistances_Time[i, k] * answerDistances_Time[i, k]; } adNum++; } } adAvg /= (double)adNum; adStd -= adAvg * adAvg; adStd = Math.Sqrt(adStd); adTimeAvg /= (double)adNum; adTimeStd -= adTimeAvg * adTimeAvg; adTimeStd = Math.Sqrt(adTimeStd); // standardization of answer process similarity distance matrix // subtract average and devide standard deviation: average 0 and variance 1 double[,] stdDistanceTable = new double[answers.Count, answers.Count]; for (int i = 0, ilen = answers.Count; i < ilen; i++) { for (int k = i + 1; k < ilen; k++) { switch (feature) { case ClassificationFeature.Proposed: stdDistanceTable[i, k] = (answerDistances[i, k] - adAvg) / adStd; break; case ClassificationFeature.AnswerTime: stdDistanceTable[i, k] = (answerDistances_Time[i, k] - adTimeAvg) / adTimeStd; break; } stdDistanceTable[k, i] = stdDistanceTable[i, k]; } } Console.WriteLine("Finished: " + sw.Elapsed); // Initial condition -> 1 element in 1 cluster // bottom up hierarchical clustering Console.WriteLine("Clustering answersheets (Group Average Method)..."); List<AnswerDendrogramTreeNode> treeElements = new List<AnswerDendrogramTreeNode>(); foreach (AnswerSheet answer in answers) { AnswerDendrogramTreeNode node = new AnswerDendrogramTreeNode(); node.AnswerData = answer; treeElements.Add(node); } while (treeElements.Count != 1) { // Join clusters that indicate minimum distance combination int minPair1 = 0; int minPair2 = 0; double minDistance = double.MaxValue; for (int i = 0, ilen = treeElements.Count; i < ilen; i++) { for (int k = i + 1; k < ilen; k++) { // calculate group average List<AnswerSheet> cluster1 = treeElements[i].GetClusterAnswerSheets(); List<AnswerSheet> cluster2 = treeElements[k].GetClusterAnswerSheets(); double d = CalcAnswersGroupAverage(cluster1, cluster2, stdDistanceTable); if (d < minDistance) { minPair1 = i; minPair2 = k; minDistance = d; } } } // join nodes AnswerDendrogramTreeNode child1 = treeElements[minPair1]; AnswerDendrogramTreeNode child2 = treeElements[minPair2]; AnswerDendrogramTreeNode[] children = new AnswerDendrogramTreeNode[] { child1, child2 }; AnswerDendrogramTreeNode node = new AnswerDendrogramTreeNode(children, minDistance, stdDistanceTable); treeElements.Add(node); treeElements.Remove(child1); treeElements.Remove(child2); } treeParent = treeElements[0]; sw.Stop(); Console.WriteLine("Finished: " + sw.Elapsed); HierarchicalClusteringResult result = new HierarchicalClusteringResult(); result.RootTree = treeParent; result.AnswerSheetDistances = stdDistanceTable; return result; }
/// <summary> /// Group answer sheets /// </summary> /// <param name="answers">List of answer sheets</param> /// <param name="name">Name of answer group</param> /// <returns></returns> public void GroupAnswerSheet(List<AnswerSheet> answers, string name) { // Calculate processing time //Stopwatch sw = new Stopwatch(); //sw.Start(); this.name = name; this.hierarchicalResult = ClusteringAnswerSheets(answers, ClassificationFeature.Proposed); this.hierarchicalResultAnswerTime = ClusteringAnswerSheets(answers, ClassificationFeature.AnswerTime); //sw.Stop(); //Console.WriteLine("Processing time: " + sw.Elapsed); }