public IEnumerable <IEmergingPattern> Mine(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature)
        {
            EmergingPatternCreator     EpCreator  = new EmergingPatternCreator();
            IEmergingPatternSimplifier simplifier = new EmergingPatternSimplifier(new ItemComparer());

            List <Feature> featuresToConsider = model.Features.Where(f => f != classFeature).ToList();
            //int featureCount = (FeatureCount != -1) ? FeatureCount : Convert.ToInt32(Math.Max((int)Math.Log(featuresToConsider.Count, 2) + 1, 0.63* featuresToConsider.Count));
            int featureCount   = (FeatureCount != -1) ? FeatureCount : (int)Math.Log(featuresToConsider.Count, 2) + 1;
            var resultPatterns = new List <IEmergingPattern>();

            var instanceCount = instances.Count();

            for (int i = 0; i < TreeCount; i++)
            {
                unsupervisedDecisionTreeBuilder.OnSelectingFeaturesToConsider =
                    (features, level) => _sampler.SampleWithoutRepetition(featuresToConsider, featureCount);

                DecisionTree           tree           = unsupervisedDecisionTreeBuilder.Build(model, instances, classFeature);
                DecisionTreeClassifier treeClassifier = new DecisionTreeClassifier(tree);

                if (treeClassifier.DecisionTree.Leaves > 1)
                {
                    EpCreator.ExtractPatterns(treeClassifier,
                                              delegate(EmergingPattern p)
                    {
                        if (EPTester.Test(p.Counts, model, classFeature))
                        {
                            resultPatterns.Add(simplifier.Simplify(p));
                        }
                    },
                                              classFeature);
                }

                resultPatterns.Add(null);
            }

            foreach (var ep in resultPatterns)
            {
                if (ep != null)
                {
                    ep.Counts = new double[1];
                    foreach (var instance in instances)
                    {
                        if (ep.IsMatch(instance))
                        {
                            ep.Counts[0]++;
                        }
                    }

                    ep.Supports    = new double[1];
                    ep.Supports[0] = ep.Counts[0] / instanceCount;
                }
            }

            return(resultPatterns);
        }
示例#2
0
        public IEnumerable <IEmergingPattern> Mine(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature)
        {
            EmergingPatternCreator     EpCreator = new EmergingPatternCreator();
            IEmergingPatternSimplifier simplifier;

            if (Multivariate)
            {
                simplifier = new EmergingPatternSimplifier(new MultivariateItemComparer());
            }
            else
            {
                simplifier = new EmergingPatternSimplifier(new ItemComparer());
            }

            List <IEmergingPattern> patternsList = new List <IEmergingPattern>();

            if (MinePatternsWhileBuildingTree)
            {
                DecisionTreeBuilder.OnSplitEvaluation =
                    delegate(IDecisionTreeNode node, ISplitIterator iterator, List <SelectorContext> currentContext)
                {
                    IChildSelector currentSelector = null;
                    for (int i = 0; i < iterator.CurrentDistribution.Length; i++)
                    {
                        double[] distribution = iterator.CurrentDistribution[i];
                        if (EPTester.Test(distribution, model, classFeature))
                        {
                            if (currentSelector == null)
                            {
                                currentSelector = iterator.CreateCurrentChildSelector();
                            }
                            EmergingPattern ep = EpCreator.ExtractPattern(currentContext, model, classFeature,
                                                                          currentSelector, i);
                            ep.Counts = (double[])distribution.Clone();
                            patternsList.Add(simplifier.Simplify(ep));
                        }
                    }
                };
                DoMine(model, instances, classFeature, EpCreator, null);
            }
            else
            {
                DoMine(model, instances, classFeature, EpCreator, p =>
                {
                    if (EPTester.Test(p.Counts, model, classFeature))
                    {
                        patternsList.Add(simplifier.Simplify(p));
                    }
                }
                       );
            }
            return(patternsList);
        }
        public IEnumerable <IEmergingPattern> MineTest(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature)
        {
            EmergingPatternCreator     EpCreator  = new EmergingPatternCreator();
            IEmergingPatternSimplifier simplifier = new EmergingPatternSimplifier(new ItemComparer());

            List <Feature> featuresToConsider = model.Features.Where(f => f != classFeature).ToList();
            int            featureCount       = (FeatureCount != -1) ? FeatureCount : (int)Math.Max(Math.Log(featuresToConsider.Count, 2) + 1, 0.63 * featuresToConsider.Count);
            var            resultPatterns     = new List <IEmergingPattern>();

            featureUseCount = new Dictionary <Feature, int>();
            foreach (var feature in featuresToConsider)
            {
                featureUseCount.Add(feature, 0);
            }

            allFeaturesUseCount = 0;
            var instanceCount = instances.Count();

            for (int i = 0; i < TreeCount; i++)
            {
                cumulativeProbabilities = new List <double>();
                double max = 0;
                for (int j = 0; j < featuresToConsider.Count; j++)
                {
                    if (featureUseCount[featuresToConsider[j]] > max)
                    {
                        max = featureUseCount[featuresToConsider[j]];
                    }
                }
                double sum = 0;
                for (int j = 0; j < featuresToConsider.Count; j++)
                {
                    cumulativeProbabilities.Add(allFeaturesUseCount == 0
                        ? 1.0 / featuresToConsider.Count
                        : 1.0 * (max - featureUseCount[featuresToConsider[j]]) / max);

                    //cumulativeProbabilities.Add(allFeaturesUseCount == 0
                    //    ? 1.0 / featuresToConsider.Count
                    //    : 1.0 * (featureUseCount[featuresToConsider[j]]) / allFeaturesUseCount);


                    sum += cumulativeProbabilities[j];

                    if (j > 0)
                    {
                        cumulativeProbabilities[j] += cumulativeProbabilities[j - 1];
                    }

                    if (sum != cumulativeProbabilities[j])
                    {
                        throw new Exception("Error computing cumalitive probabilities!");
                    }
                }
                for (int j = 0; j < featuresToConsider.Count; j++)
                {
                    cumulativeProbabilities[j] /= sum;
                }

                unsupervisedDecisionTreeBuilder.OnSelectingFeaturesToConsider =
                    (features, level) => SampleWithDistribution(featuresToConsider, featureCount);

                DecisionTree           tree           = unsupervisedDecisionTreeBuilder.Build(model, instances, classFeature);
                DecisionTreeClassifier treeClassifier = new DecisionTreeClassifier(tree);

                if (treeClassifier.DecisionTree.Leaves > 1)
                {
                    EpCreator.ExtractPatterns(treeClassifier,
                                              delegate(EmergingPattern p)
                    {
                        if (EPTester.Test(p.Counts, model, classFeature))
                        {
                            foreach (Item item in p.Items)
                            {
                                featureUseCount[item.Feature]++;
                                allFeaturesUseCount++;
                            }

                            resultPatterns.Add(simplifier.Simplify(p));
                        }
                    },
                                              classFeature);
                }

                resultPatterns.Add(null);
            }

            foreach (var ep in resultPatterns)
            {
                if (ep != null)
                {
                    ep.Counts = new double[1];
                    foreach (var instance in instances)
                    {
                        if (ep.IsMatch(instance))
                        {
                            ep.Counts[0]++;
                        }
                    }

                    ep.Supports    = new double[1];
                    ep.Supports[0] = ep.Counts[0] / instanceCount;
                }
            }

            return(resultPatterns);
        }
示例#4
0
 protected abstract void DoMine(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature,
                                EmergingPatternCreator epCreator, Action <EmergingPattern> action);