// step 1: extraction of data from traces public static MLDataset FromExecutionTraces(Individual /* unused */ individual, IList <ExecutionTrace> traces) { MLDataset dataset = new MLDataset(); int count = traces.Count; dataset.Count = count; for (int i = 0; i < count; i++) { var trace = traces[i]; IList <ulong> featureOrder = new List <ulong>(trace.States.Count); foreach (var record in trace.States) { object[] featureValues; if (!dataset._features.TryGetValue(record.NodeId, out featureValues)) { featureValues = new object[count]; dataset._features.Add(record.NodeId, featureValues); dataset._featureInfo.Add(record.NodeId, new MLDatasetFeature(record.NodeId, record.OperatorId, record.Value.GetType())); } featureValues[i] = record.Value; if (record.OperatorId > 0) // only use operators (no variables, constants), because constant values get dropped anyway { featureOrder.Add(record.NodeId); } } dataset._featureOrder.Add(featureOrder); } return(dataset); }
private int[] GetExpectedOutputDataset(TestSuite testSuite) { int[] dataset = new int[testSuite.TestCases.Count]; for (int i = 0; i < dataset.Length; i++) { dataset[i] = MLDataset.ToDatasetValue(testSuite.TestCases[i].Result).GetValueOrDefault(); } return(dataset); }
private void LogDatasetFeatures(MLDataset dataset) { if (dataset != null) { Logger.Write(4, "Features: "); foreach (var feature in dataset.Features) { Logger.Write(4, feature.ToString() + ","); } Logger.WriteLine(4, ""); } }
public MDLFitnessResult(double fitness, MLDataset dataset = null) : base(fitness) { Dataset = dataset; }
public FitnessResult CalculateFitness(Individual individual, TestSuite testSuite, object[] results) { double fitness = StandardFitnessCalculator.CalculateFitness(individual, testSuite, results).Fitness; // standard fitness f0 MLDataset dataset = MLDataset.FromExecutionTraces(individual, Singleton <ExecutionRecord> .Instance.Traces); LogDatasetFeatures(dataset); var fitnessResult = new MDLFitnessResult(fitness, dataset); if (dataset.Features.Count() > 0) { // Variant 1 (results of all nodes): //var input = dataset.ToRawInputDataset(); // Variant 2 (results and operation types of first n and last n operations in chronological order): //var input = dataset.ToRawFirstNLastNInputDataset(5, 5); // Veriant 3 (results only consider values of bool and int expressions and operations) int startContinuousFeatures; var input = MLDataset.ConvertTracesToTypedSteps(individual, Singleton <ExecutionRecord> .Instance.Traces, Steps, true, out startContinuousFeatures); if (input.Length > 0 && input[0].Length > 0) { var expected = GetExpectedOutputDataset(testSuite); LogDataset(input, expected); var decisionTree = CreateDecisionTree(input, expected, startContinuousFeatures); if (decisionTree != null) { int error = GetClassificationError(decisionTree, input, expected); int treeSize = GetTreeSize(decisionTree); double mdlFitness = CalculateMDLFitness(error, treeSize, results.Length); double stdFitness = fitness; fitness *= mdlFitness; var rules = decisionTree.ToRules(); LogResult(fitness, error, treeSize, mdlFitness, rules); int usedAttributes = GetUsedAttributes(decisionTree).Count; fitnessResult.Fitness = fitness; fitnessResult.ClassificationError = error; fitnessResult.TreeSize = treeSize; fitnessResult.StandardFitness = stdFitness; fitnessResult.UsedAttributes = usedAttributes; // log MDL result details if (fitness == 0) { Logger.WriteLine(1, "Std fitness: " + stdFitness); var predicted = decisionTree.Decide(input); var loss = Math.Round(new ZeroOneLoss(expected).Loss(predicted) * expected.Length); var classificationErrorFactor = (((double)error + 1) / (results.Length + 1)); Logger.WriteLine(1, "Program error: " + error + "; loss: " + loss + "; factor: " + classificationErrorFactor); var treeSizeFactor = Math.Log(treeSize + 1, 2); Logger.WriteLine(1, "Tree size: " + treeSize + "; factor: " + treeSizeFactor); Logger.WriteLine(1, "MDL fitness: " + mdlFitness); } } } } else if (fitness == 0) { Logger.WriteLine(1, "Standard fitness: 0"); } return(fitnessResult); }