private void DetermineMaximal(Depth depth) { var couldBeMaximal = PatternsFrequent.GetPotentialMaximalAtDepth(depth + 1); foreach (var pt in couldBeMaximal) { pt.DetermineMaximal(depth + 1); if (pt.IsMaximal == YesNoUnknown.Yes) { PatternsFrequent.AddMaximal(pt); } } }
private void DetermineClosed(Depth depth) { var fdi = PatternsFrequent.GetFrequentsAtDepth(depth + 1); foreach (var pt in fdi) { pt.DetermineClosed(depth + 1); if (pt.IsClosed == YesNoUnknown.Yes) { PatternsFrequent.AddClosed(pt); } } }
void ScanP1P2(ITreeNode tn, ref Depth maxDepth) { if (maxDepth <= tn.Depth) { maxDepth = tn.Depth; } var treeId = tn.Tree.TreeId; var preList1P = new[] { tn.Symbol, MiningParams.BackTrackSymbol }; var patternKey1P = preList1P.ToPreorderString(MiningParams.Separator); if (!OnePatterns.ContainsKey(patternKey1P)) { var onePt = PatternTree.Create(preList1P, false, MiningParams); PatternsExtended.AddPattern(onePt); OnePatterns.Add(onePt.PreorderString, onePt); } OnePatterns[patternKey1P].AddOccurrence(OccInduced.Create(treeId, tn.Depth, new[] { tn.PreorderIndex })); if (tn.Children == null) { return; } foreach (var child in tn.Children) {// Scan for 2-patterns, and each child implies an existence of right-most 2-occurrence. var preList2P = new[] { tn.Symbol, child.Symbol, MiningParams.BackTrackSymbol, MiningParams.BackTrackSymbol }; var patternKey2P = preList2P.ToPreorderString(MiningParams.Separator); if (!TwoPatterns.ContainsKey(patternKey2P)) { var twoPt = PatternTree.Create(preList2P, true, MiningParams); PatternsExtended.AddPattern(twoPt); TwoPatterns.Add(twoPt.PreorderString, twoPt); } var occ = OccInduced.Create(treeId, tn.Depth, new[] { tn.PreorderIndex, child.PreorderIndex }); if (child.IsLeaf) { occ.AbleToConnect = false; } TwoPatterns[patternKey2P].AddOccurrence(occ); ScanP1P2(child, ref maxDepth); } }
protected override int GenerateF1F2(IEnumerable <ITextTree> treeSet) { Depth maxDepth = -1; foreach (var tree in treeSet) { ScanP1P2(tree.Root, ref maxDepth); } PatternsFrequent.SetDepth(maxDepth); EvaluateFrequency(); return(maxDepth); }
internal int RootSupportAbove(Depth depth, bool includingRoot) { var count = 0; while (--depth >= 0) { if (depth == 0 && !includingRoot) { break; } if (ContainsDepth(depth)) { count += this[depth].RootOccurrenceCount; } } return(count); }
internal MiningResults Mine(IList <ITextTree> treeSet) { if (taskFinished) { throw new InvalidOperationException("Mining has been performed, check the result."); } if (IsMining) { throw new InvalidOperationException("It has been mining."); } if (treeSet == null) { throw new ArgumentNullException("treeSet"); } try { IsMining = true; Canonicalize(treeSet); BuildPreorderIndex(treeSet); Stopwatch timeCounter = Stopwatch.StartNew(); MaxDepth = GenerateF1F2(treeSet); var depth = MaxDepth - 1; while (depth >= 0) { Combine(depth); Connect(--depth); } timeCounter.Stop(); return(CollectResults(timeCounter)); } finally { IsMining = false; taskFinished = true; } }
private void StartTraversal(IEnumerable <PatternTree> rDi, Depth depth) { var groups = DevideToRelatedGroups(rDi); foreach (var group in groups) { if (group.Count < 1) { continue; } foreach (var t in group) { for (var y = 0; y < group.Count; y++) { Traversal(t, y, group, depth); } } } }
protected override void Connect(Depth depth) { var f2Di = PatternsFrequent.GetConnectableAtDepth(depth); var fDj = PatternsFrequent.GetToBeConnectableAtDepth(depth + 1); foreach (var f2 in f2Di) { var toBeConnected = SelectPatternsOfSameRoot(f2.SecondSymbol, fDj, depth + 1); foreach (var fpt in toBeConnected) { var childPreStr = f2.ConnectPreorderRepresentation(fpt).ToPreorderString(MiningParams.Separator); if (PatternsExtended.AlreadyExtended(childPreStr)) { continue; } if (!f2.HasNewConnectOccurrenceAtDepth(fpt, depth)) { continue; } ConnectTwoPatterns(f2, fpt, depth); } } if (MiningParams.MineClosed) { DetermineClosed(depth); } if (MiningParams.MineMaximal) { DetermineMaximal(depth); } Pruner.PruneAfterConnection(PatternsFrequent, MiningParams, depth); }
internal int TransactionSupportAbove(Depth depth, bool includingRoot) { var hashSet = new HashSet <TreeId>(); while (--depth >= 0) { if (depth == 0 && !includingRoot) { break; } if (!ContainsDepth(depth)) { continue; } foreach (TreeOccSet tree in this[depth].GetTreeSet()) { hashSet.Add(tree.TreeId); } } return(hashSet.Count); }
OccInduced(TreeId treeId, Depth depth, IList <int> preorderCode) { if (string.IsNullOrEmpty(treeId)) { throw new ArgumentOutOfRangeException("treeId"); } if (depth < 0) { throw new ArgumentOutOfRangeException("depth", "Depth of an occurrence should be larger than or equal to 0."); } if (preorderCode == null || preorderCode.Count <= 0) { throw new ArgumentNullException("preorderCode"); } this.treeId = treeId; this.depth = depth; this.preorderCode = new ReadOnlyCollection <int>(preorderCode); AbleToConnect = (PreorderCode.Count == 2); AbleToBeConnected = preorderCode[0] > 0; }
internal bool IsSuperPattern(PatternTree largerPt, Depth depth) { return(this.IsInducedSuperPattern(largerPt, MiningParams.BackTrackSymbol)); }
internal static IOccurrence Create(TreeId treeId, Depth depth, IList <int> preorderCode) { return(new OccInduced(treeId, depth, preorderCode)); }
internal DepthOccSet(Depth depth) { this.depth = depth; RootOccurrenceCount = 0; }
private IEnumerable <PatternTree> SelectPatternsOfSameRoot(NodeSymbol symbol, ICollection <PatternTree> fDi, Depth depth) { var pts = new List <PatternTree>(); if (fDi == null || fDi.Count <= 0) { return(pts); } pts.AddRange(fDi.Where(pt => pt.FirstSymbol == symbol && pt.AbleToBeConnected && pt.ContainsDepth(depth))); return(pts); }
private static void PruneCannotBeClosed(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth) { if (param.MineFrequent || !(param.MineClosed || param.MineMaximal)) { return; } var rDi = fRecorder.GetFanout1FrequentsAtDepth(depth); rDi.Sort(); var dic = new Dictionary <NodeSymbol, List <PatternTree> >(); foreach (var t in rDi) { if (t.Is2Pattern) { continue; } var key = t.FirstSymbol + "," + t.SecondSymbol; if (!dic.ContainsKey(key)) { dic.Add(key, new List <PatternTree>()); } dic[key].Add(t); } foreach (var fpSet in dic) { var keysRedundant = new HashSet <string>(); for (var i = 0; i < fpSet.Value.Count; i++) { var ti = fpSet.Value[i]; for (var j = 0; j < fpSet.Value.Count; j++) { var tj = fpSet.Value[j]; if (i == j) { continue; } if (ti.Size >= tj.Size || ti.TransactionSupport != tj.TransactionSupport || ti.RootSupport != tj.RootSupport) { continue; } if (!ti.IsSuperPattern(tj, depth)) { continue; } var maxDif = (param.SupportType == SupportType.Transaction) ? param.ThresholdTransaction : param.ThresholdRoot; if (ti.NumberOfRightMostOcc - tj.NumberOfRightMostOcc >= maxDif) { continue; } keysRedundant.Add(ti.PreorderString); break; } } fRecorder.RemoveRedundantForClosed(keysRedundant); Debug.WriteLine("Depth:{0} RemoveRedundantForClosed Number={1}", depth, keysRedundant.Count); } }
internal void PruneAfterConnection(MiningParams param, Depth depth) { switch (param.SupportType) { case SupportType.Transaction: { if (AbleToCombine || AbleToConnect) { var t = TransactionSupportAbove(depth + 1, true); if (t < param.ThresholdTransaction) { AbleToCombine = false; AbleToConnect = false; } } if (AbleToBeConnected) { var t = TransactionSupportAbove(depth + 1, false); if (t < param.ThresholdTransaction) { AbleToBeConnected = false; } } } break; case SupportType.RootOccurrence: { if (AbleToCombine || AbleToConnect) { var r = RootSupportAbove(depth + 1, true); if (r < param.ThresholdRoot) { AbleToCombine = false; AbleToConnect = false; } } if (AbleToBeConnected) { var r = RootSupportAbove(depth + 1, false); if (r < param.ThresholdRoot) { AbleToBeConnected = false; } } } break; case SupportType.Hybrid: { if (AbleToCombine || AbleToConnect) { var t = TransactionSupportAbove(depth + 1, true); if (t < param.ThresholdTransaction) { AbleToCombine = false; AbleToConnect = false; } else { var r = RootSupportAbove(depth + 1, true); if (r < param.ThresholdRoot) { AbleToCombine = false; AbleToConnect = false; } } } if (AbleToBeConnected) { var t = RootSupportAbove(depth + 1, false); if (t < param.ThresholdTransaction) { AbleToBeConnected = false; } else { var r = RootSupportAbove(depth + 1, false); if (r < param.ThresholdRoot) { AbleToBeConnected = false; } } } } break; default: throw new ArgumentOutOfRangeException(); } }
protected abstract void Combine(Depth depth);
internal PatternTree[] GetClosedAtDepth(Depth depth) { return(Closeds.Values.Where(pt => pt.ContainsDepth(depth)).ToArray()); }
internal static bool HasNewConnectOccurrenceAtDepth(this PatternTree p2, PatternTree pt, Depth depth) { if (p2 == null) { throw new ArgumentNullException("p2"); } if (!p2.Is2Pattern) { throw new ArgumentException("The connect pattern must be a 2-pattern."); } if (pt == null) { throw new ArgumentNullException("pt"); } if (p2.SecondSymbol != pt.FirstSymbol) { return(false); } var depthConnect = depth; var depthToBeConnected = depthConnect + 1; if (!p2.ContainsDepth(depthConnect) || !pt.ContainsDepth(depthToBeConnected)) { return(false); } foreach (TreeOccSet tSet in p2[depthConnect].GetTreeSet()) {// For every tree that contains p2 at 'depthConnect' if (!pt.ContainsTreeAtDepth(depthToBeConnected, tSet.TreeId)) { continue; } foreach (RootOcc rSet in tSet.GetRootSet()) { // For every root occurrence, check its leaves foreach (IOccurrence iOcc in rSet.GetRightMostSet()) { // checks each leaf, if a leaf of root occurrence of p2 is the root of an occurrence of pt, there might be a new pattern. if (pt[depthToBeConnected][tSet.TreeId].ContainsRootIndex(iOcc.RightMostIndex)) { // An occurrence of p2 has a leaf which is the root of an occurrence of pt, a new pattern should be extended. return(true); } } } } return(false); }
private void Traversal(PatternTree xPattern, int yIndex, IList <PatternTree> group, Depth depth) { var pX = xPattern; var pY = group[yIndex]; var childPreStr = pX.CombinePreorderRepresentation(pY).ToPreorderString(MiningParams.Separator); PatternTree child = null; if (PatternsExtended.AlreadyExtended(childPreStr)) { child = PatternsFrequent.GetPatternAtDepth(childPreStr, depth); } else if (pX.HasNewCombineOccurrenceAtDepth(pY, depth)) { child = Combine2Patterns(pX, pY, depth); } if (child == null) { return; } for (var i = 0; i < group.Count; i++) { Traversal(child, i, group, depth); } }
protected abstract void Connect(Depth depth);
private static void PruneCannotBeExtended(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth) { var fDi = fRecorder.GetFrequentsAtDepth(depth + 1); foreach (var fpt in fDi) { fpt.PruneAfterConnection(param, depth); } fRecorder.RemoveCannotBeExtended(depth + 1); }
public RootOcc(TreeId treeId, Depth depth, PreorderIndex rootIndex) { this.treeId = treeId; this.depth = depth; this.rootIndex = rootIndex; }
internal bool ContainsDepth(Depth depth) { return(DepthOccSet.ContainsKey(depth)); }
internal bool ContainsTreeAtDepth(Depth depth, TreeId treeId) { return(DepthOccSet.ContainsKey(depth) && DepthOccSet[depth].ContainsTree(treeId)); }
private void ConnectTwoPatterns(PatternTree f2, PatternTree fpt, Depth depth) { if (f2.Size != 2) { throw new InvalidOperationException("The connect pattern must be 2-pattern."); } var preList = f2.ConnectPreorderRepresentation(fpt); var child = PatternTree.Create(preList, true, MiningParams); PatternsExtended.AddPattern(child); var depthC = depth + 1; // Depth of connect while (--depthC >= 0) { if (!f2.ContainsDepth(depthC)) { continue; } var depthTbc = depthC + 1; // Depth of to be connected if (!fpt.ContainsDepth(depthTbc)) { continue; } foreach (TreeOccSet tSet in f2[depthC].GetTreeSet()) { if (!fpt.ContainsTreeAtDepth(depthTbc, tSet.TreeId)) { continue; } foreach (RootOcc root in tSet.GetRootSet()) { foreach (IOccurrence f2Occ in root.GetRightMostSet()) { if (!fpt[depthTbc][tSet.TreeId].RootSet.ContainsKey(f2Occ.SecondIndex)) { continue; } var newOcc = f2Occ.Connect(fpt[depthTbc][tSet.TreeId][f2Occ.SecondIndex].FirstOcc); child.AddOccurrence(newOcc); } } } } if (!child.IsFrequent) { return; } PatternsFrequent.AddFrequentPattern(child); child.Father = f2; child.Mother = fpt; f2.CheckMatch(child); fpt.CheckMatch(child); }
protected override void Connect(Depth depth) { throw new NotImplementedException(); }
internal static void PruneAfterConnection(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth) { if (!param.MineFrequent && (param.MineClosed || param.MineMaximal)) { PruneCannotBeClosed(fRecorder, param, depth); } PruneCannotBeExtended(fRecorder, param, depth); }
internal static bool HasNewCombineOccurrenceAtDepth(this PatternTree xPattern, PatternTree yPattern, Depth depth) { if (xPattern == null) { throw new ArgumentNullException("xPattern"); } if (yPattern == null) { throw new ArgumentNullException("yPattern"); } if (xPattern.FirstSymbol != yPattern.FirstSymbol) { return(false); } if (!xPattern.ContainsDepth(depth) || !yPattern.ContainsDepth(depth)) { return(false); } foreach (TreeOccSet tree in xPattern[depth].GetTreeSet()) { if (!yPattern.ContainsTreeAtDepth(depth, tree.TreeId)) { continue; } foreach (RootOcc rSet in tree.GetRootSet()) { if (!yPattern[depth][tree.TreeId].ContainsRootIndex(rSet.RootIndex)) { continue; } foreach (IOccurrence occY in yPattern[depth][tree.TreeId][rSet.RootIndex].GetRightMostSet()) { if (rSet.FirstOcc.RightMostIndex < occY.SecondIndex) { return(true); } } } } return(false); }
internal List <PatternTree> GetConnectableAtDepth(Depth depth) { return(Frequent2Pts.Values.Where(f2 => f2.AbleToConnect && f2.ContainsDepth(depth)).ToList()); }