public List <ItemSet <T> > MinePatternsContaining(T item, GetMinSupportHandle getMinItemSetSupport) { List <ItemSet <T> > selection = new List <ItemSet <T> >(); MinePatternsContaining(mRoot, selection, item, getMinItemSetSupport); return(selection); }
public ItemSets <T> MinePatterns(IEnumerable <Transaction <T> > database, IList <T> domain, GetMinSupportHandle getMinItemSetSupport) { Dictionary <T, int> counts = new Dictionary <T, int>(); for (int i = 0; i < domain.Count; ++i) { counts[domain[i]] = 0; } foreach (Transaction <T> transaction in database) { for (int i = 0; i < domain.Count; ++i) { if (transaction.ContainsItem(domain[i])) { counts[domain[i]]++; } } } List <T> freqItems = new List <T>(); for (int i = 0; i < domain.Count; ++i) { var xxx = getMinItemSetSupport(new ItemSet <T>() { domain[i] }); if (counts[domain[i]] >= getMinItemSetSupport(new ItemSet <T>() { domain[i] })) { freqItems.Add(domain[i]); } } T[] FList = freqItems.ToArray(); Array.Sort(FList, (i1, i2) => { int comp = counts[i1].CompareTo(counts[i2]); return(-comp); }); FPTree <T> fpTree = new FPTree <T>(); int dbSize = 0; foreach (Transaction <T> transaction in database) { dbSize++; List <T> orderedFreqItems = new List <T>(); for (int i = 0; i < FList.Length; ++i) { if (transaction.ContainsItem(FList[i])) { orderedFreqItems.Add(FList[i]); } } fpTree.AddOrderedFreqItems(orderedFreqItems); } fpTree.DbSize = dbSize; ItemSets <T> allItemSets = new ItemSets <T>(); for (int i = FList.Length - 1; i >= 0; i--) { T item = FList[i]; List <ItemSet <T> > fis = fpTree.MinePatternsContaining(item, getMinItemSetSupport(new ItemSet <T>() { item })); fpTree.RemoveFromLeaves(item); allItemSets.AddRange(fis); } return(allItemSets); }
protected void MinePatternsContaining(FPTreeNode <T> node, List <ItemSet <T> > selection, T targetItem, GetMinSupportHandle getMinItemSetSupport) { if (node.IsLeaf) { if (node.Item.Equals(targetItem)) { ItemSet <T> fis = new ItemSet <T>(); fis = node.GetPath(); fis.TransactionCount = node.Count; fis.DbSize = mDbSize; if (node.Count >= getMinItemSetSupport(fis) * mDbSize) { selection.Add(fis); } } return; } for (int i = 0; i < node.ChildCount; ++i) { MinePatternsContaining(node.GetChild(i), selection, targetItem, getMinItemSetSupport); } }
protected ItemSets <T> GenerateLargeItemSets(List <Transaction <T> > partition, GetMinSupportHandle getMinItemSetSupport, IList <T> domain) { ItemSets <T> Fk = new ItemSets <T>(); for (int i = 0; i < domain.Count; ++i) { T item = domain[i]; ItemSet <T> itemset = new ItemSet <T>() { item }; for (int j = 0; j < partition.Count; ++j) { if (partition[j].ContainsItemSet(itemset)) { long tid = partition[j].ID; itemset.TransactionIDList.Add(tid); } } if (itemset.TransactionIDList.Count >= getMinItemSetSupport(itemset) * partition.Count) { Fk.Add(itemset); } } int k = 1; ItemSets <T> allFrequentItemSets = new ItemSets <T>(); allFrequentItemSets.AddRange(Fk); while (Fk.Count > 0) { ItemSets <T> Fkp1 = new ItemSets <T>(); //do self-join for (int i = 0; i < Fk.Count; ++i) { for (int j = 0; j < Fk.Count; ++j) { if (i == j) { continue; } bool canJoin = true; for (int l = 0; l < k - 1; ++l) { if (Fk[i][l].CompareTo(Fk[j][l]) != 0) { canJoin = false; break; } } if (canJoin) { if (CanJoin(Fk[i], Fk[j][k - 1])) { ItemSet <T> c = Union(Fk[i], Fk[j]); if (c.TransactionIDList.Count >= getMinItemSetSupport(c) * partition.Count) { Fkp1.Add(c); } } } } } allFrequentItemSets.AddRange(Fkp1); Fk = Fkp1; k++; } return(allFrequentItemSets); }
public virtual ItemSets <T> MinePatterns(IEnumerable <Transaction <T> > database, GetMinSupportHandle getMinItemSetSupport, IList <T> domain, int partitionCount) { HashSet <ItemSet <T> > candidates = new HashSet <ItemSet <T> >(); for (int i = 0; i < partitionCount; ++i) { List <Transaction <T> > partition = ReadInPartition(i, database); ItemSets <T> fis = GenerateLargeItemSets(partition, getMinItemSetSupport, domain); foreach (ItemSet <T> itemset in fis) { candidates.Add(itemset); } } int dbSize = 0; for (int i = 0; i < partitionCount; ++i) { List <Transaction <T> > partition = ReadInPartition(i, database); dbSize += partition.Count; foreach (ItemSet <T> itemset in candidates) { itemset.TransactionCount += GetCount(partition, itemset); } } foreach (ItemSet <T> itemset in candidates) { itemset.DbSize = dbSize; } ItemSets <T> C = new ItemSets <T>(); foreach (ItemSet <T> itemset in candidates) { if (itemset.Support >= getMinItemSetSupport(itemset)) { C.Add(itemset); } } return(C); }
public virtual ItemSets <T> MinePatterns(GetCountHandle updateItemSetSupport, GetMinSupportHandle getMinItemSetSupport, IList <T> domain) { ItemSets <T> Fk = new ItemSets <T>(); List <ItemSet <T> > itemsetSup = new List <ItemSet <T> >(); for (int i = 0; i < domain.Count; ++i) { T item = domain[i]; ItemSet <T> itemset = new ItemSet <T>() { item }; itemset.TransactionCount = 0; itemsetSup.Add(itemset); } updateItemSetSupport(itemsetSup); foreach (ItemSet <T> itemset in itemsetSup) { if (itemset.Support >= getMinItemSetSupport(itemset)) { Fk.Add(itemset); } } int k = 1; ItemSets <T> allFrequentItemSets = new ItemSets <T>(); allFrequentItemSets.AddRange(Fk); while (Fk.Count > 0) { ItemSets <T> Fkp1 = new ItemSets <T>(); //do self-join for (int i = 0; i < Fk.Count; ++i) { for (int j = 0; j < Fk.Count; ++j) { if (i == j) { continue; } bool canJoin = true; for (int l = 0; l < k - 1; ++l) { if (Fk[i][l].CompareTo(Fk[j][l]) != 0) { canJoin = false; break; } } if (canJoin) { if (CanJoin(Fk[i], Fk[j][k - 1])) { ItemSet <T> c = Fk[i].Clone(); c.Add(Fk[j][k - 1]); Fkp1.Add(c); } } } } updateItemSetSupport(Fkp1); List <ItemSet <T> > fis = new List <ItemSet <T> >(); foreach (ItemSet <T> itemset in Fkp1) { if (itemset.Support >= getMinItemSetSupport(itemset)) { fis.Add(itemset); } } allFrequentItemSets.AddRange(fis); Fk.Clear(); Fk.AddRange(fis); k++; } return(allFrequentItemSets); }
public virtual ItemSets <T> MinePatterns(IEnumerable <Transaction <T> > database, GetMinSupportHandle getMinItemSetSupport, IList <T> domain) { return(MinePatterns((itemsets) => { foreach (ItemSet <T> itemset in itemsets) { itemset.TransactionCount = 0; } int dbSize = 0; foreach (Transaction <T> transaction in database) { foreach (ItemSet <T> itemset in itemsets) { if (transaction.ContainsItemSet(itemset)) { itemset.TransactionCount++; } } dbSize++; } foreach (ItemSet <T> itemset in itemsets) { itemset.DbSize = dbSize; } }, getMinItemSetSupport, domain)); }