private Rule GetSubruleOfInacurrany(Rule rule, double inaccurancy, int inaccuracyOfTrainingTable) { List <Rule> subrulesList = new List <Rule>(); do { rule.decisionValue = GetMostCommonDecision(GetSubsetOfRule(rule, dataSet.trainingSet)); subrulesList.Add(rule.Copy()); rule.DeleteFirstCondition(); } while (rule.conditions.Count > 0); List <double> inaccurancyList = new List <double>(); foreach (Rule subrule in subrulesList) { List <Dictionary <string, string> > subsetOfSubRule = GetSubsetOfRule(subrule.Copy(), dataSet.trainingSet); inaccurancyList.Add(GreedyAlgorithm.getUncertaintyOfSubset(subsetOfSubRule, dataSet.decisionHeader) / inaccuracyOfTrainingTable); } int k = 0; for (int i = 1; i < inaccurancyList.Count; i++) { if (inaccurancyList[i] != inaccurancyList[k] && inaccurancyList[i] <= inaccurancy) { k = i; } } return(subrulesList[k]); }
private void SplitSet(Random random) { foreach (var record in entireSet) { double rand = random.NextDouble(); if (rand < 0.3) { trainingSet.Add(record); } else if (rand < 0.5) { validationSet.Add(record); } else { testSet.Add(record); } } if (GreedyAlgorithm.getUncertaintyOfSubset(trainingSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(validationSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(testSet, decisionHeader) == 0) { trainingSet = new List <Dictionary <string, string> >(); validationSet = new List <Dictionary <string, string> >(); testSet = new List <Dictionary <string, string> >(); SplitSet(random); } }
private RuleSet Pruning() { int inaccuracyOfTrainingTable = GreedyAlgorithm.getUncertaintyOfSubset(dataSet.trainingSet, dataSet.decisionHeader); List <double> inaccuracyList = GetInaccuracyList(trainRuleSet.GetRuleSet(), inaccuracyOfTrainingTable); List <RuleSet> ruleSetsList = new List <RuleSet>(); foreach (var i in inaccuracyList) { ruleSetsList.Add(new RuleSet(dataSet.headerRow, dataSet.decisionHeader)); } for (int k = 0; k < ruleSetsList.Count; k++) { foreach (Rule rule in trainRuleSet.GetRuleSet()) { ruleSetsList[k].AddRule(GetSubruleOfInacurrany(rule.Copy(), inaccuracyList[k], inaccuracyOfTrainingTable)); } } return(GetBestRuleSet(ruleSetsList)); }
private List <double> GetInaccuracyList(List <Rule> rules, int inaccuracyOfTrainingTable) { List <double> inaccuracyList = new List <double>(); foreach (Rule rule in rules) { Rule subrule = rule.Copy(); do { List <Dictionary <string, string> > subsetOfSubRule = GetSubsetOfRule(subrule, dataSet.trainingSet); double inaccuracy = GreedyAlgorithm.getUncertaintyOfSubset(subsetOfSubRule, dataSet.decisionHeader) / inaccuracyOfTrainingTable; if (!inaccuracyList.Contains(inaccuracy)) { inaccuracyList.Add(inaccuracy); } subrule.DeleteFirstCondition(); } while (subrule.conditions.Count > 0); } inaccuracyList.Sort(); return(inaccuracyList); }
private void SplitSet(Random random, double percent) { double threshold = percent / 100; foreach (var record in entireSet) { double rand = random.NextDouble(); if (rand < threshold) { trainingSet.Add(record); } else { testSet.Add(record); } } if (GreedyAlgorithm.getUncertaintyOfSubset(trainingSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(testSet, decisionHeader) == 0) { trainingSet = new List <Dictionary <string, string> >(); testSet = new List <Dictionary <string, string> >(); SplitSet(random, percent); } }