public IEnumerable <IEmergingPattern> Mine(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature) { EmergingPatternCreator EpCreator = new EmergingPatternCreator(); IEmergingPatternSimplifier simplifier = new EmergingPatternSimplifier(new ItemComparer()); List <Feature> featuresToConsider = model.Features.Where(f => f != classFeature).ToList(); //int featureCount = (FeatureCount != -1) ? FeatureCount : Convert.ToInt32(Math.Max((int)Math.Log(featuresToConsider.Count, 2) + 1, 0.63* featuresToConsider.Count)); int featureCount = (FeatureCount != -1) ? FeatureCount : (int)Math.Log(featuresToConsider.Count, 2) + 1; var resultPatterns = new List <IEmergingPattern>(); var instanceCount = instances.Count(); for (int i = 0; i < TreeCount; i++) { unsupervisedDecisionTreeBuilder.OnSelectingFeaturesToConsider = (features, level) => _sampler.SampleWithoutRepetition(featuresToConsider, featureCount); DecisionTree tree = unsupervisedDecisionTreeBuilder.Build(model, instances, classFeature); DecisionTreeClassifier treeClassifier = new DecisionTreeClassifier(tree); if (treeClassifier.DecisionTree.Leaves > 1) { EpCreator.ExtractPatterns(treeClassifier, delegate(EmergingPattern p) { if (EPTester.Test(p.Counts, model, classFeature)) { resultPatterns.Add(simplifier.Simplify(p)); } }, classFeature); } resultPatterns.Add(null); } foreach (var ep in resultPatterns) { if (ep != null) { ep.Counts = new double[1]; foreach (var instance in instances) { if (ep.IsMatch(instance)) { ep.Counts[0]++; } } ep.Supports = new double[1]; ep.Supports[0] = ep.Counts[0] / instanceCount; } } return(resultPatterns); }
public IEnumerable <IEmergingPattern> Mine(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature) { EmergingPatternCreator EpCreator = new EmergingPatternCreator(); IEmergingPatternSimplifier simplifier; if (Multivariate) { simplifier = new EmergingPatternSimplifier(new MultivariateItemComparer()); } else { simplifier = new EmergingPatternSimplifier(new ItemComparer()); } List <IEmergingPattern> patternsList = new List <IEmergingPattern>(); if (MinePatternsWhileBuildingTree) { DecisionTreeBuilder.OnSplitEvaluation = delegate(IDecisionTreeNode node, ISplitIterator iterator, List <SelectorContext> currentContext) { IChildSelector currentSelector = null; for (int i = 0; i < iterator.CurrentDistribution.Length; i++) { double[] distribution = iterator.CurrentDistribution[i]; if (EPTester.Test(distribution, model, classFeature)) { if (currentSelector == null) { currentSelector = iterator.CreateCurrentChildSelector(); } EmergingPattern ep = EpCreator.ExtractPattern(currentContext, model, classFeature, currentSelector, i); ep.Counts = (double[])distribution.Clone(); patternsList.Add(simplifier.Simplify(ep)); } } }; DoMine(model, instances, classFeature, EpCreator, null); } else { DoMine(model, instances, classFeature, EpCreator, p => { if (EPTester.Test(p.Counts, model, classFeature)) { patternsList.Add(simplifier.Simplify(p)); } } ); } return(patternsList); }
public IEnumerable <IEmergingPattern> MineTest(InstanceModel model, IEnumerable <Instance> instances, Feature classFeature) { EmergingPatternCreator EpCreator = new EmergingPatternCreator(); IEmergingPatternSimplifier simplifier = new EmergingPatternSimplifier(new ItemComparer()); List <Feature> featuresToConsider = model.Features.Where(f => f != classFeature).ToList(); int featureCount = (FeatureCount != -1) ? FeatureCount : (int)Math.Max(Math.Log(featuresToConsider.Count, 2) + 1, 0.63 * featuresToConsider.Count); var resultPatterns = new List <IEmergingPattern>(); featureUseCount = new Dictionary <Feature, int>(); foreach (var feature in featuresToConsider) { featureUseCount.Add(feature, 0); } allFeaturesUseCount = 0; var instanceCount = instances.Count(); for (int i = 0; i < TreeCount; i++) { cumulativeProbabilities = new List <double>(); double max = 0; for (int j = 0; j < featuresToConsider.Count; j++) { if (featureUseCount[featuresToConsider[j]] > max) { max = featureUseCount[featuresToConsider[j]]; } } double sum = 0; for (int j = 0; j < featuresToConsider.Count; j++) { cumulativeProbabilities.Add(allFeaturesUseCount == 0 ? 1.0 / featuresToConsider.Count : 1.0 * (max - featureUseCount[featuresToConsider[j]]) / max); //cumulativeProbabilities.Add(allFeaturesUseCount == 0 // ? 1.0 / featuresToConsider.Count // : 1.0 * (featureUseCount[featuresToConsider[j]]) / allFeaturesUseCount); sum += cumulativeProbabilities[j]; if (j > 0) { cumulativeProbabilities[j] += cumulativeProbabilities[j - 1]; } if (sum != cumulativeProbabilities[j]) { throw new Exception("Error computing cumalitive probabilities!"); } } for (int j = 0; j < featuresToConsider.Count; j++) { cumulativeProbabilities[j] /= sum; } unsupervisedDecisionTreeBuilder.OnSelectingFeaturesToConsider = (features, level) => SampleWithDistribution(featuresToConsider, featureCount); DecisionTree tree = unsupervisedDecisionTreeBuilder.Build(model, instances, classFeature); DecisionTreeClassifier treeClassifier = new DecisionTreeClassifier(tree); if (treeClassifier.DecisionTree.Leaves > 1) { EpCreator.ExtractPatterns(treeClassifier, delegate(EmergingPattern p) { if (EPTester.Test(p.Counts, model, classFeature)) { foreach (Item item in p.Items) { featureUseCount[item.Feature]++; allFeaturesUseCount++; } resultPatterns.Add(simplifier.Simplify(p)); } }, classFeature); } resultPatterns.Add(null); } foreach (var ep in resultPatterns) { if (ep != null) { ep.Counts = new double[1]; foreach (var instance in instances) { if (ep.IsMatch(instance)) { ep.Counts[0]++; } } ep.Supports = new double[1]; ep.Supports[0] = ep.Counts[0] / instanceCount; } } return(resultPatterns); }