public static DecisionTreeNode <TItem> GenerateTree(IReadOnlyList <TItem> items, IClassifier <TItem> classifier) { var itemCount = items.Count; var itemDescriptors = new List <ItemDescriptor <TItem> >(itemCount); for (var i = 0; i < itemCount; i++) { var item = items[i]; itemDescriptors.Add(new ItemDescriptor <TItem>() { Criteria = classifier.GetCriteria(item), Index = i, Item = item, }); } var comparer = new DecisionCriterionValueEqualityComparer(classifier.ValueComparer); return(GenerateNode( new TreeBuilderContext(), comparer, itemDescriptors)); }
private static DecisionTreeNode <TItem> GenerateNode( TreeBuilderContext context, DecisionCriterionValueEqualityComparer comparer, List <ItemDescriptor <TItem> > items) { // The extreme use of generics here is intended to reduce the number of intermediate // allocations of wrapper classes. Performance testing found that building these trees allocates // significant memory that we can avoid and that it has a real impact on startup. var criteria = new Dictionary <string, Criterion>(StringComparer.OrdinalIgnoreCase); // Matches are items that have no remaining criteria - at this point in the tree // they are considered accepted. var matches = new List <TItem>(); // For each item in the working set, we want to map it to it's possible criteria-branch // pairings, then reduce that tree to the minimal set. foreach (var item in items) { var unsatisfiedCriteria = 0; foreach (var kvp in item.Criteria) { // context.CurrentCriteria is the logical 'stack' of criteria that we've already processed // on this branch of the tree. if (context.CurrentCriteria.Contains(kvp.Key)) { continue; } unsatisfiedCriteria++; if (!criteria.TryGetValue(kvp.Key, out var criterion)) { criterion = new Criterion(comparer); criteria.Add(kvp.Key, criterion); } if (!criterion.TryGetValue(kvp.Value, out var branch)) { branch = new List <ItemDescriptor <TItem> >(); criterion.Add(kvp.Value, branch); } branch.Add(item); } // If all of the criteria on item are satisfied by the 'stack' then this item is a match. if (unsatisfiedCriteria == 0) { matches.Add(item.Item); } } // Iterate criteria in order of branchiness to determine which one to explore next. If a criterion // has no 'new' matches under it then we can just eliminate that part of the tree. var reducedCriteria = new List <DecisionCriterion <TItem> >(); foreach (var criterion in criteria.OrderByDescending(c => c.Value.Count)) { var reducedBranches = new Dictionary <object, DecisionTreeNode <TItem> >(comparer.InnerComparer); foreach (var branch in criterion.Value) { bool hasReducedItems = false; foreach (var item in branch.Value) { if (context.MatchedItems.Add(item)) { hasReducedItems = true; } } if (hasReducedItems) { var childContext = new TreeBuilderContext(context); childContext.CurrentCriteria.Add(criterion.Key); var newBranch = GenerateNode(childContext, comparer, branch.Value); reducedBranches.Add(branch.Key.Value, newBranch); } } if (reducedBranches.Count > 0) { var newCriterion = new DecisionCriterion <TItem>() { Key = criterion.Key, Branches = reducedBranches, }; reducedCriteria.Add(newCriterion); } } return(new DecisionTreeNode <TItem>() { Criteria = reducedCriteria, Matches = matches, }); }
public Criterion(DecisionCriterionValueEqualityComparer comparer) : base(comparer) { }