Пример #1
0
 public k_foldCrossValidation(DataSetCross dataSet)
 {
     ruleSetList  = new List <RuleSet>();
     qualityList  = new List <double>();
     this.dataSet = dataSet;
     //Parallel.For(0, dataSet.numberOfSets, i =>
     //{
     //    List<Dictionary<string, string>> trainingSet = dataSet.GetTrainingSet(i);
     //    GreedyAlgorithm greedy = new GreedyAlgorithm(trainingSet, dataSet.headerRow, dataSet.decisionHeader);
     //    RuleSet temp = greedy.ruleSet;
     //    ruleSetList.Add(temp);
     //    qualityList.Add(GetQualityOfRuleSet(i, temp));
     //}
     //);
     for (int i = 0; i < dataSet.numberOfSets; i++)
     {
         List <Dictionary <string, string> > trainingSet = dataSet.GetTrainingSet(i);
         GreedyAlgorithm greedy = new GreedyAlgorithm(trainingSet, dataSet.headerRow, dataSet.decisionHeader);
         RuleSet         temp   = greedy.ruleSet;
         ruleSetList.Add(temp);
         qualityList.Add(GetQualityOfRuleSet(i, temp));
     }
     SetAVGQuality();
     SetBestQuality();
 }
Пример #2
0
        private Rule GetSubruleOfInacurrany(Rule rule, double inaccurancy, int inaccuracyOfTrainingTable)
        {
            List <Rule> subrulesList = new List <Rule>();

            do
            {
                rule.decisionValue = GetMostCommonDecision(GetSubsetOfRule(rule, dataSet.trainingSet));
                subrulesList.Add(rule.Copy());
                rule.DeleteFirstCondition();
            } while (rule.conditions.Count > 0);
            List <double> inaccurancyList = new List <double>();

            foreach (Rule subrule in subrulesList)
            {
                List <Dictionary <string, string> > subsetOfSubRule = GetSubsetOfRule(subrule.Copy(), dataSet.trainingSet);
                inaccurancyList.Add(GreedyAlgorithm.getUncertaintyOfSubset(subsetOfSubRule, dataSet.decisionHeader) / inaccuracyOfTrainingTable);
            }
            int k = 0;

            for (int i = 1; i < inaccurancyList.Count; i++)
            {
                if (inaccurancyList[i] != inaccurancyList[k] && inaccurancyList[i] <= inaccurancy)
                {
                    k = i;
                }
            }
            return(subrulesList[k]);
        }
Пример #3
0
 private void SplitSet(Random random)
 {
     foreach (var record in entireSet)
     {
         double rand = random.NextDouble();
         if (rand < 0.3)
         {
             trainingSet.Add(record);
         }
         else if (rand < 0.5)
         {
             validationSet.Add(record);
         }
         else
         {
             testSet.Add(record);
         }
     }
     if (GreedyAlgorithm.getUncertaintyOfSubset(trainingSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(validationSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(testSet, decisionHeader) == 0)
     {
         trainingSet   = new List <Dictionary <string, string> >();
         validationSet = new List <Dictionary <string, string> >();
         testSet       = new List <Dictionary <string, string> >();
         SplitSet(random);
     }
 }
Пример #4
0
        private RuleSet Pruning()
        {
            int           inaccuracyOfTrainingTable = GreedyAlgorithm.getUncertaintyOfSubset(dataSet.trainingSet, dataSet.decisionHeader);
            List <double> inaccuracyList            = GetInaccuracyList(trainRuleSet.GetRuleSet(), inaccuracyOfTrainingTable);

            List <RuleSet> ruleSetsList = new List <RuleSet>();

            foreach (var i in inaccuracyList)
            {
                ruleSetsList.Add(new RuleSet(dataSet.headerRow, dataSet.decisionHeader));
            }
            for (int k = 0; k < ruleSetsList.Count; k++)
            {
                foreach (Rule rule in trainRuleSet.GetRuleSet())
                {
                    ruleSetsList[k].AddRule(GetSubruleOfInacurrany(rule.Copy(), inaccuracyList[k], inaccuracyOfTrainingTable));
                }
            }
            return(GetBestRuleSet(ruleSetsList));
        }
Пример #5
0
        private List <double> GetInaccuracyList(List <Rule> rules, int inaccuracyOfTrainingTable)
        {
            List <double> inaccuracyList = new List <double>();

            foreach (Rule rule in rules)
            {
                Rule subrule = rule.Copy();
                do
                {
                    List <Dictionary <string, string> > subsetOfSubRule = GetSubsetOfRule(subrule, dataSet.trainingSet);
                    double inaccuracy = GreedyAlgorithm.getUncertaintyOfSubset(subsetOfSubRule, dataSet.decisionHeader) / inaccuracyOfTrainingTable;
                    if (!inaccuracyList.Contains(inaccuracy))
                    {
                        inaccuracyList.Add(inaccuracy);
                    }
                    subrule.DeleteFirstCondition();
                } while (subrule.conditions.Count > 0);
            }
            inaccuracyList.Sort();
            return(inaccuracyList);
        }
Пример #6
0
        private void SplitSet(Random random, double percent)
        {
            double threshold = percent / 100;

            foreach (var record in entireSet)
            {
                double rand = random.NextDouble();
                if (rand < threshold)
                {
                    trainingSet.Add(record);
                }
                else
                {
                    testSet.Add(record);
                }
            }
            if (GreedyAlgorithm.getUncertaintyOfSubset(trainingSet, decisionHeader) == 0 || GreedyAlgorithm.getUncertaintyOfSubset(testSet, decisionHeader) == 0)
            {
                trainingSet = new List <Dictionary <string, string> >();
                testSet     = new List <Dictionary <string, string> >();
                SplitSet(random, percent);
            }
        }
Пример #7
0
        protected void Train()
        {
            GreedyAlgorithm greedyAlgorithm = new GreedyAlgorithm(dataSet.trainingSet, dataSet.headerRow, dataSet.decisionHeader);

            trainRuleSet = greedyAlgorithm.ruleSet;
        }