private static String Generate1_0Rules(ClasteringResults cr, ParserToken[] features) { String rules = ""; String[] featuresStrings = new String[features.Length]; for (int a = 0; a < featuresStrings.Length; a++) { featuresStrings[a] = ""; featuresStrings[a] = ReturnRecursiveFeatureValue(features[a]); } return(rules); }
//string[] attributes = new string[] { "Height", "Weight" }; //double[][] rawData = new double[20][]; //raw, 3, 30 public static ClasteringResults Clasterize(double[][] rawData, int numberOfClusters, int maxCount, int randomSeed) { if (rawData == null) { throw new RGDLException("Empty structure to clasterize", new Exception()); } if (rawData.Length < 1) { throw new RGDLException("Empty structure to clasterize", new Exception()); } if (rawData[0].Length < 1) { throw new RGDLException("Empty structure to clasterize", new Exception()); } int numAttributes = rawData[0].Length; int numClusters = numberOfClusters; //Console.WriteLine("\nk = " + numClusters + " and maxCount = " + maxCount); int[] clustering = Cluster(rawData, numClusters, numAttributes, maxCount, randomSeed); double[][] means = Allocate(numClusters, numAttributes); UpdateMeans(rawData, clustering, means); double[][] standardDeviations = Allocate(numClusters, numAttributes); UpdateStandardDeviations(rawData, clustering, means, standardDeviations); ClasteringResults cr = new ClasteringResults(); cr.clustering = clustering; cr.means = means; cr.standardDeviations = standardDeviations; return(cr); /*Console.WriteLine("\nClustering complete"); * Console.WriteLine("\nClustering in internal format: \n"); * ShowVector(clustering); * Console.WriteLine("\nClustered data:"); * ShowClustering(rawData, numClusters, clustering); * Console.WriteLine("\nCentroids:"); * double[][] means = Allocate(numClusters, attributes.Length); * UpdateMeans(rawData, clustering, means); * for (int a = 0; a < numClusters; a++) * { * double[] centroid = ComputeCentroid(rawData, clustering, a, means); * Console.Write("Claster (" + a + "): "); * for (int b = 0; b < centroid.Length; b++) * Console.Write(centroid[b] + " "); * Console.WriteLine(""); * } * double[] outlier = Outlier(rawData, clustering, numClusters, 0); * Console.WriteLine("Outlier for cluster 0 is:"); * ShowVector(outlier); * Console.WriteLine("\nEnd demo\n");*/ }
//double eps = 60; //int minPts = 3; public static ClasteringResults Clasterize(double[][] rawData, double eps, int minPts) { ClasteringResults cr = new ClasteringResults(); List <Point> points = new List <Point>(); // sample data for (int a = 0; a < rawData.Length; a++) { points.Add(new Point(rawData[a], a)); } cr.clustering = new int[rawData.Length]; List <List <Point> > clusters = GetClusters(points, eps, minPts); for (int a = 0; a < clusters.Count; a++) { foreach (Point p in clusters[a]) { cr.clustering[p.id] = p.ClusterId; } } return(cr); }
public static ClasteringResults Clasterize(double[][] rawData, int numberOfClusters, double maxDistance) { ClasteringResults cr = new ClasteringResults(); distances = new double[rawData.Length][]; for (int a = 0; a < distances.Length; a++) { distances[a] = new double[rawData.Length]; for (int b = 0; b < distances[a].Length; b++) { distances[a][b] = Distance(rawData[a], rawData[b]); } } cr.clustering = new int[rawData.Length]; ArrayList allClusters = new ArrayList(); int clusterId = 1; for (int a = 0; a < rawData.Length; a++) { cluster c = new cluster(); dataRecord dr = new dataRecord(); dr.data = new double[rawData[0].Length]; for (int b = 0; b < dr.data.Length; b++) { dr.data[b] = rawData[a][b]; } dr.index = a; c.children.Add(dr); c.clusterId = clusterId; clusterId++; allClusters.Add(c); } cluster c1, c2; cluster c1ToJoin = null, c2ToJoin = null; while (allClusters.Count > 1) { c1ToJoin = null; c2ToJoin = null; double minDistance = double.MaxValue; double valueHelp = minDistance; for (int a = 0; a < allClusters.Count; a++) { c1 = (cluster)allClusters[a]; for (int b = a + 1; b < allClusters.Count; b++) { //if (a != b) { c2 = (cluster)allClusters[b]; valueHelp = UPGMA(c1, c2); if (valueHelp < minDistance) { c1ToJoin = c1; c2ToJoin = c2; minDistance = valueHelp; } } } } cluster joinCluster = new cluster(); joinCluster.c1 = c1ToJoin; joinCluster.c2 = c2ToJoin; joinCluster.distance = minDistance; joinCluster.children.AddRange(c1ToJoin.children); joinCluster.children.AddRange(c2ToJoin.children); joinCluster.clusterId = clusterId; clusterId++; //c2ToJoin.parent = c1ToJoin; //c1ToJoin.childrenHierarchy.Add(c2ToJoin); //c1ToJoin.children.Add(c2ToJoin); //c2ToJoin.parentDistance = minDistance; //c1ToJoin.children.AddRange(c2ToJoin.children); //AddAllChilderToArrayList(c1ToJoin, c2ToJoin); allClusters.Remove(c1ToJoin); allClusters.Remove(c2ToJoin); allClusters.Add(joinCluster); } cluster resultClusteringCluster = (cluster)allClusters[0]; //resultClusteringCluster.parentDistance = double.MaxValue; GenerateClusters(resultClusteringCluster, cr.clustering, 50); return(cr); }
//public static void GenerateRules(ArrayList recording, ParserToken[] features, String oldFileName, String newFileName, int clastersCount, double minimalTimeDistance) public static String GenerateRules(ArrayList recording, ParserToken[] features, String oldFileContent, int clastersCount, double minimalTimeDistance, int maxIterationsCount, String ruleName, String GDLVersion, double epsilon, int randomSeed, ref int[] keyframes) { double[][] rawData; rawData = new double[recording.Count][]; GDLInterpreter inter = new GDLInterpreter(features, null); String[] foo = null; long[] time = new long[recording.Count]; for (int a = 0; a < rawData.Length; a++) { rawData[a] = new double[features.Length]; TSkeleton ts = ((TSkeleton[])recording[a])[0]; if (a > 0) { //time[a] = time[a - 1] + ts.TimePeriod; time[a] = ts.TimePeriod; } Point3D[] bodyParts = HendlerHolder.GenerateBodyPartArray(ts, 0); foo = inter.ReturnConclusions(bodyParts, 0); TrackingMemory tm = (TrackingMemory)inter.Heap[0]; for (int b = 0; b < features.Length; b++) { double v = (double)tm.Features[features[b].Conclusion]; rawData[a][b] = v; //RuleReturnValue rrV = IsSatisfied(FeatureTable[a], conclusions, tm.Features); //if (rrV.RuleType == ParserToken.RuleTypeNumeric) } } //inter = new GDLInterpreter(AllFeatures, AllRules); ClasteringResults cr = KMeansClustering.Clasterize(rawData, clastersCount, maxIterationsCount, randomSeed); /* * double step = 2; * double[][] rawDataSubset = new double[(int)(rawData.Length/step)][]; * * for (int a = 0; (int)step * a < rawData.Length && a < rawDataSubset.Length; a++) * { * rawDataSubset[a] = new double[rawData[(int)step * a].Length]; * for (int b = 0; b < rawData[(int)step * a].Length; b++) * { * rawDataSubset[a][b] = rawData[a * (int)step][b]; * } * } * * ClasteringResults cr = HierarchicalClustering.Clasterize(rawDataSubset, clastersCount, 0); * SaveClusteringToFile(cr.clustering, rawDataSubset, 3); * //ClasteringResults cr = HierarchicalClustering.Clasterize(rawData, clastersCount, 0); * //SaveClusteringToFile(cr.clustering, rawData, 3); * * //ClasteringResults cr = DBSCAN.Clasterize(rawData, 64, 32); * //SaveClusteringToFile(cr.clustering, rawData, 3); * * int abc = 0; * if (abc == 0) * return null; */ //TimeAnalyser(cr.clustering, time,2); String newFeatures = ""; double festureEPS = epsilon; keyframes = new int[clastersCount]; int [] sequence = new int[rawData.Length]; for (int a = 0; a < rawData.Length; a++) { sequence[a] = -1; int classIndex = -1; //po wszytskich klastrach for (int c = 0; c < cr.means.Length; c++) { bool satisfied = true; for (int b = 0; b < rawData[a].Length; b++) { if (Math.Abs(rawData[a][b] - cr.means[c][b]) > cr.standardDeviations[c][b] + festureEPS) { satisfied = false; } } if (satisfied) { classIndex = c; keyframes[classIndex] = a; } } sequence[a] = classIndex; } //SaveClusteringToFile(sequence, rawData, 3); string newGDLFileContent = ""; //HowLong[] hla = GenerateSequenceFromClusteredData(cr.clustering, time); HowLong[] hla = GenerateSequenceFromRawData(sequence, time); if (GDLVersion == "1.0") { String[] featuresArray = new String[features.Length]; int position = 0; for (int a = 0; a < featuresArray.Length; a++) { int startF = oldFileContent.IndexOf("FEATURE", position, StringComparison.OrdinalIgnoreCase); int endF = oldFileContent.IndexOf("AS", startF, StringComparison.OrdinalIgnoreCase); int startLength = startF + "FEATURE".Length + 1; featuresArray[a] = oldFileContent.Substring(startLength, endF - startLength); position = endF; } String rules = ""; for (int a = 0; a < cr.means.Length; a++) { if (a > 0) { rules += "\r\n"; } rules += "RULE "; for (int b = 0; b < cr.means[0].Length; b++) { if (b > 0) { rules += "\r\n\t& "; } rules += "abs(" + featuresArray[b] + " -" + cr.means[a][b].ToString(CultureInfo.InvariantCulture) + ") <= " + cr.standardDeviations[a][b].ToString(CultureInfo.InvariantCulture) + " + " + festureEPS.ToString(CultureInfo.InvariantCulture); } rules += " THEN " + ruleName + "" + a; } String sequenceRules = ""; double[] probability; double[][][] seqHelp = NGramAnalyser(hla, time, clastersCount, minimalTimeDistance, out probability); for (int grams = 0; grams < seqHelp.Length; grams++) { if (grams > 0) { sequenceRules += "\r\n"; } if (seqHelp[grams] != null) { sequenceRules += "//Frequency of appearance: " + probability[grams].ToString(CultureInfo.InvariantCulture) + "\r\n"; sequenceRules += "RULE " + ruleName + "" + (int)seqHelp[grams][0][0]; if (seqHelp[grams].Length > 1) { sequenceRules += " & sequenceexists(\""; for (int a = 1; a < seqHelp[grams].Length; a++) { sequenceRules += "[" + ruleName + "" + (int)seqHelp[grams][a][0] + "," + (seqHelp[grams][a][1] / 1000).ToString(CultureInfo.InvariantCulture) + "]"; } } /*sequenceRules += "RULE sequenceexists(\""; * for (int a = 0; a < seqHelp[grams].Length; a++) * { * sequenceRules += "[" + ruleName + "" + (int)seqHelp[grams][a][0] + "," + (seqHelp[grams][a][1] / 1000).ToString(CultureInfo.InvariantCulture) + "]"; * }*/ sequenceRules += "\") THEN " + ruleName + "_" + (clastersCount - 1 + grams) + "GRAMS!"; } } String configuration = ""; configuration += "//Date and time of an analysis: " + DateTime.Now + "\r\n" + "//Done by: Tomasz Hachaj\r\n" + "//Clustering method: K-Means clustering\r\n" + "//Clusters count: " + clastersCount + "\r\n" + "//N-grams range: [" + (clastersCount - 1) + "," + (2 * clastersCount - 1) + "]\r\n" + "//Minimal time distance between rules in sequence: " + minimalTimeDistance.ToString(CultureInfo.InvariantCulture) + " seconds"; Random random = new Random(); String sentencjaDnia = SentencjaDnia[random.Next(SentencjaDnia.Length)]; newGDLFileContent += "//-------------R-GDLv1.0 RULES---------------------------------------\r\n" + rules; newGDLFileContent += "\r\n\r\n"; newGDLFileContent += "//-------------N-gram based analysis of sequences--------------------\r\n"; newGDLFileContent += sequenceRules; newGDLFileContent += "\r\n\r\n"; newGDLFileContent += "//-------------R-GDLv1.0 configuration details----------------------\r\n"; newGDLFileContent += configuration; newGDLFileContent += "\r\n\r\n"; newGDLFileContent += "//Citation of the day: " + sentencjaDnia; //String rules = Generate1_0Rules(cr, features); } if (GDLVersion == "1.1") { for (int a = 0; a < features.Length; a++) { if (a > 0) { newFeatures += "\r\n"; } newFeatures += "FEATURE " + festureEPS.ToString(CultureInfo.InvariantCulture) + " AS " + features[a].Conclusion + "_EPS"; } newFeatures += "\r\n"; newFeatures += "\r\n"; for (int a = 0; a < cr.means.Length; a++) { if (a > 0) { newFeatures += "\r\n"; } for (int b = 0; b < cr.means[0].Length; b++) { newFeatures += "FEATURE " + cr.means[a][b].ToString(CultureInfo.InvariantCulture) + " AS " + features[b].Conclusion + "_MEAN_" + a + "\r\n"; newFeatures += "FEATURE " + cr.standardDeviations[a][b].ToString(CultureInfo.InvariantCulture) + " AS " + features[b].Conclusion + "_DEV_" + a + "\r\n"; } } String rules = ""; for (int a = 0; a < cr.means.Length; a++) { if (a > 0) { rules += "\r\n"; } rules += "RULE "; for (int b = 0; b < cr.means[0].Length; b++) { if (b > 0) { rules += "& "; } rules += "abs(" + features[b].Conclusion + " -" + features[b].Conclusion + "_MEAN_" + a + ") <= " + features[b].Conclusion + "_DEV_" + a + " + " + features[b].Conclusion + "_EPS "; } rules += "THEN " + ruleName + "" + a; } String sequenceRules = ""; double[] probability; /*double[][][] seqHelp = NGramAnalyser(hla, time, clastersCount, minimalTimeDistance, out probability); * for (int grams = 0; grams < seqHelp.Length; grams++) * { * if (grams > 0) * sequenceRules += "\r\n"; * if (seqHelp[grams] != null) * { * sequenceRules += "//Frequency of appearance: " + probability[grams].ToString(CultureInfo.InvariantCulture) + "\r\n"; * //odwrócić kolejność wypisywania - od ostatniego do pierwszego * sequenceRules += "RULE " + ruleName + "" + (int)seqHelp[grams][0][0]; * if (seqHelp[grams].Length > 1) * { * sequenceRules += " & sequenceexists(\""; * for (int a = 1; a < seqHelp[grams].Length; a++) * { * sequenceRules += "[" + ruleName + "" + (int)seqHelp[grams][a][0] + "," + (seqHelp[grams][a][1] / 1000).ToString(CultureInfo.InvariantCulture) + "]"; * } * sequenceRules += "\")"; * } * sequenceRules += " THEN GESTURE_" + (clastersCount - 1 + grams) + "GRAMS!"; * } * }*/ String configuration = ""; configuration += "//Date and time of an analysis: " + DateTime.Now + "\r\n" + "//Done by: Tomasz Hachaj\r\n" + "//Clustering method: K-Means clustering\r\n" + "//Clusters count: " + clastersCount + "\r\n" + "//N-grams range: [" + (clastersCount - 1) + "," + (2 * clastersCount - 1) + "]\r\n" + "//Minimal time distance between rules in sequence: " + minimalTimeDistance.ToString(CultureInfo.InvariantCulture) + " seconds"; Random random = new Random(); String sentencjaDnia = SentencjaDnia[random.Next(SentencjaDnia.Length)]; //string newGDLFileContent = System.IO.File.ReadAllText(oldFileName); newGDLFileContent = oldFileContent; newGDLFileContent = "//-------------Original FEATURES-------------------------------------\r\n" + newGDLFileContent + "\r\n"; newGDLFileContent += "//-------------R-GDLv1.0 FEATURES------------------------------------\r\n" + newFeatures + "\r\n"; newGDLFileContent += "//-------------R-GDLv1.0 RULES---------------------------------------\r\n" + rules; /* * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//-------------N-gram based analysis of sequences--------------------\r\n"; * newGDLFileContent += sequenceRules; * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//-------------R-GDLv1.0 configuration details----------------------\r\n"; * newGDLFileContent += configuration; * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//Citation of the day: " + sentencjaDnia;*/ } if (GDLVersion == "1.2") { for (int a = 0; a < features.Length; a++) { if (a > 0) { newFeatures += "\r\n"; } newFeatures += "FEATURE " + festureEPS.ToString(CultureInfo.InvariantCulture) + " AS " + features[a].Conclusion + "_EPS"; } newFeatures += "\r\n"; newFeatures += "\r\n"; for (int a = 0; a < cr.means.Length; a++) { if (a > 0) { newFeatures += "\r\n"; } for (int b = 0; b < cr.means[0].Length; b++) { newFeatures += "FEATURE " + cr.means[a][b].ToString(CultureInfo.InvariantCulture) + " AS " + features[b].Conclusion + "_MEAN_" + a + "\r\n"; newFeatures += "FEATURE " + cr.standardDeviations[a][b].ToString(CultureInfo.InvariantCulture) + " AS " + features[b].Conclusion + "_DEV_" + a + "\r\n"; } } //newFeatures += "\r\n"; String clusterBordersFeatures = ""; for (int a = 0; a < cr.means.Length; a++) { for (int b = 0; b < cr.means[0].Length; b++) { clusterBordersFeatures += "FEATURE " + "abs(" + features[b].Conclusion + " -" + features[b].Conclusion + "_MEAN_" + a + ") / (" + features[b].Conclusion + "_DEV_" + a + " + " + features[b].Conclusion + "_EPS ) AS " + features[b].Conclusion + "_RULE_" + a + "_FEATURE_" + b; clusterBordersFeatures += "\r\n"; } } //newFeatures += "\r\n"; String ClustersScoresFeatures = ""; for (int a = 0; a < cr.means.Length; a++) { ClustersScoresFeatures += "FEATURE ("; for (int b = 0; b < cr.means[0].Length; b++) { if (b > 0) { ClustersScoresFeatures += " + "; } ClustersScoresFeatures += features[b].Conclusion + "_RULE_" + a + "_FEATURE_" + b; } ClustersScoresFeatures += " ) / " + cr.means[0].Length + " AS " + ruleName + a + "_SCORE\r\n"; } /* * ClustersScoresFeatures += "FEATURE ("; * for (int a = 0; a < cr.means.Length; a++) * { * if (a > 0) * ClustersScoresFeatures += "+ "; * ClustersScoresFeatures += ruleName + a + "_SCORE "; * } * ClustersScoresFeatures += ") / " + cr.means.Length + " AS " + ruleName + "_OVERALL_SCORE\r\n"; */ String rules = ""; for (int a = 0; a < cr.means.Length; a++) { if (a > 0) { rules += "\r\n"; } rules += "RULE "; for (int b = 0; b < cr.means[0].Length; b++) { if (b > 0) { rules += " & "; } rules += features[b].Conclusion + "_RULE_" + a + "_FEATURE_" + b + " <= 1"; } rules += " THEN " + ruleName + "" + a; } String sequenceRules = ""; /*double[] probability; * double[][][] seqHelp = NGramAnalyser(hla, time, clastersCount, minimalTimeDistance, out probability); * for (int grams = 0; grams < seqHelp.Length; grams++) * { * if (grams > 0) * sequenceRules += "\r\n"; * if (seqHelp[grams] != null) * { * sequenceRules += "//Frequency of appearance: " + probability[grams].ToString(CultureInfo.InvariantCulture) + "\r\n"; * //odwrócić kolejność wypisywania - od ostatniego do pierwszego * sequenceRules += "RULE " + ruleName + "" + (int)seqHelp[grams][0][0]; * if (seqHelp[grams].Length > 1) * { * sequenceRules += " & sequenceexists(\""; * for (int a = 1; a < seqHelp[grams].Length; a++) * { * sequenceRules += "[" + ruleName + "" + (int)seqHelp[grams][a][0] + "," + (seqHelp[grams][a][1] / 1000).ToString(CultureInfo.InvariantCulture) + "]"; * } * sequenceRules += "\")"; * } * * sequenceRules += " THEN GESTURE_" + (clastersCount - 1 + grams) + "GRAMS!"; * } * }*/ String configuration = ""; configuration += "//Date and time of an analysis: " + DateTime.Now + "\r\n" + "//Done by: Tomasz Hachaj\r\n" + "//Clustering method: K-Means clustering\r\n" + "//Clusters count: " + clastersCount + "\r\n" + "//N-grams range: [" + (clastersCount - 1) + "," + (2 * clastersCount - 1) + "]\r\n" + "//Minimal time distance between rules in sequence: " + minimalTimeDistance.ToString(CultureInfo.InvariantCulture) + " seconds"; Random random = new Random(); String sentencjaDnia = SentencjaDnia[random.Next(SentencjaDnia.Length)]; newGDLFileContent = oldFileContent; newGDLFileContent = "//-------------ORIGINAL FEATURES--------------------------------------\r\n" + newGDLFileContent + "\r\n"; newGDLFileContent += "//-------------R-GDLv1.0 FEATURES------------------------------------\r\n"; newGDLFileContent += "//-------------CLUSTER ANALYSIS--------------------------------------\r\n" + newFeatures + "\r\n"; newGDLFileContent += "//-------------CLUSTER BORDERS---------------------------------------\r\n"; newGDLFileContent += clusterBordersFeatures + "\r\n"; newGDLFileContent += "//-------------FEATURE SCORES----------------------------------------\r\n"; newGDLFileContent += ClustersScoresFeatures + "\r\n"; newGDLFileContent += "//-------------R-GDLv1.0 RULES---------------------------------------\r\n" + rules; /* * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//-------------N-gram based analysis of sequences--------------------\r\n"; * newGDLFileContent += sequenceRules; * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//-------------R-GDLv1.0 configuration details----------------------\r\n"; * newGDLFileContent += configuration; * newGDLFileContent += "\r\n\r\n"; * newGDLFileContent += "//Citation of the day: " + sentencjaDnia;*/ } return(newGDLFileContent); //System.IO.File.WriteAllText(newFileName, newGDLFileContent); }