예제 #1
0
        private void DetermineMaximal(Depth depth)
        {
            var couldBeMaximal = PatternsFrequent.GetPotentialMaximalAtDepth(depth + 1);

            foreach (var pt in couldBeMaximal)
            {
                pt.DetermineMaximal(depth + 1);
                if (pt.IsMaximal == YesNoUnknown.Yes)
                {
                    PatternsFrequent.AddMaximal(pt);
                }
            }
        }
예제 #2
0
        private void DetermineClosed(Depth depth)
        {
            var fdi = PatternsFrequent.GetFrequentsAtDepth(depth + 1);

            foreach (var pt in fdi)
            {
                pt.DetermineClosed(depth + 1);
                if (pt.IsClosed == YesNoUnknown.Yes)
                {
                    PatternsFrequent.AddClosed(pt);
                }
            }
        }
예제 #3
0
        void ScanP1P2(ITreeNode tn, ref Depth maxDepth)
        {
            if (maxDepth <= tn.Depth)
            {
                maxDepth = tn.Depth;
            }

            var treeId = tn.Tree.TreeId;

            var preList1P    = new[] { tn.Symbol, MiningParams.BackTrackSymbol };
            var patternKey1P = preList1P.ToPreorderString(MiningParams.Separator);

            if (!OnePatterns.ContainsKey(patternKey1P))
            {
                var onePt = PatternTree.Create(preList1P, false, MiningParams);
                PatternsExtended.AddPattern(onePt);
                OnePatterns.Add(onePt.PreorderString, onePt);
            }

            OnePatterns[patternKey1P].AddOccurrence(OccInduced.Create(treeId, tn.Depth, new[] { tn.PreorderIndex }));

            if (tn.Children == null)
            {
                return;
            }

            foreach (var child in tn.Children)
            {// Scan for 2-patterns, and each child implies an existence of right-most 2-occurrence.
                var preList2P    = new[] { tn.Symbol, child.Symbol, MiningParams.BackTrackSymbol, MiningParams.BackTrackSymbol };
                var patternKey2P = preList2P.ToPreorderString(MiningParams.Separator);

                if (!TwoPatterns.ContainsKey(patternKey2P))
                {
                    var twoPt = PatternTree.Create(preList2P, true, MiningParams);
                    PatternsExtended.AddPattern(twoPt);
                    TwoPatterns.Add(twoPt.PreorderString, twoPt);
                }

                var occ = OccInduced.Create(treeId, tn.Depth, new[] { tn.PreorderIndex, child.PreorderIndex });
                if (child.IsLeaf)
                {
                    occ.AbleToConnect = false;
                }

                TwoPatterns[patternKey2P].AddOccurrence(occ);

                ScanP1P2(child, ref maxDepth);
            }
        }
예제 #4
0
        protected override int GenerateF1F2(IEnumerable <ITextTree> treeSet)
        {
            Depth maxDepth = -1;

            foreach (var tree in treeSet)
            {
                ScanP1P2(tree.Root, ref maxDepth);
            }

            PatternsFrequent.SetDepth(maxDepth);

            EvaluateFrequency();

            return(maxDepth);
        }
예제 #5
0
        internal int RootSupportAbove(Depth depth, bool includingRoot)
        {
            var count = 0;

            while (--depth >= 0)
            {
                if (depth == 0 && !includingRoot)
                {
                    break;
                }

                if (ContainsDepth(depth))
                {
                    count += this[depth].RootOccurrenceCount;
                }
            }
            return(count);
        }
예제 #6
0
        internal MiningResults Mine(IList <ITextTree> treeSet)
        {
            if (taskFinished)
            {
                throw new InvalidOperationException("Mining has been performed, check the result.");
            }

            if (IsMining)
            {
                throw new InvalidOperationException("It has been mining.");
            }

            if (treeSet == null)
            {
                throw new ArgumentNullException("treeSet");
            }

            try
            {
                IsMining = true;

                Canonicalize(treeSet);
                BuildPreorderIndex(treeSet);

                Stopwatch timeCounter = Stopwatch.StartNew();

                MaxDepth = GenerateF1F2(treeSet);
                var depth = MaxDepth - 1;
                while (depth >= 0)
                {
                    Combine(depth);
                    Connect(--depth);
                }

                timeCounter.Stop();

                return(CollectResults(timeCounter));
            }
            finally
            {
                IsMining     = false;
                taskFinished = true;
            }
        }
예제 #7
0
        private void StartTraversal(IEnumerable <PatternTree> rDi, Depth depth)
        {
            var groups = DevideToRelatedGroups(rDi);

            foreach (var group in groups)
            {
                if (group.Count < 1)
                {
                    continue;
                }

                foreach (var t in group)
                {
                    for (var y = 0; y < group.Count; y++)
                    {
                        Traversal(t, y, group, depth);
                    }
                }
            }
        }
예제 #8
0
        protected override void Connect(Depth depth)
        {
            var f2Di = PatternsFrequent.GetConnectableAtDepth(depth);
            var fDj  = PatternsFrequent.GetToBeConnectableAtDepth(depth + 1);

            foreach (var f2 in f2Di)
            {
                var toBeConnected = SelectPatternsOfSameRoot(f2.SecondSymbol, fDj, depth + 1);

                foreach (var fpt in toBeConnected)
                {
                    var childPreStr = f2.ConnectPreorderRepresentation(fpt).ToPreorderString(MiningParams.Separator);

                    if (PatternsExtended.AlreadyExtended(childPreStr))
                    {
                        continue;
                    }
                    if (!f2.HasNewConnectOccurrenceAtDepth(fpt, depth))
                    {
                        continue;
                    }

                    ConnectTwoPatterns(f2, fpt, depth);
                }
            }

            if (MiningParams.MineClosed)
            {
                DetermineClosed(depth);
            }
            if (MiningParams.MineMaximal)
            {
                DetermineMaximal(depth);
            }

            Pruner.PruneAfterConnection(PatternsFrequent, MiningParams, depth);
        }
예제 #9
0
        internal int TransactionSupportAbove(Depth depth, bool includingRoot)
        {
            var hashSet = new HashSet <TreeId>();

            while (--depth >= 0)
            {
                if (depth == 0 && !includingRoot)
                {
                    break;
                }

                if (!ContainsDepth(depth))
                {
                    continue;
                }

                foreach (TreeOccSet tree in this[depth].GetTreeSet())
                {
                    hashSet.Add(tree.TreeId);
                }
            }

            return(hashSet.Count);
        }
예제 #10
0
        OccInduced(TreeId treeId, Depth depth, IList <int> preorderCode)
        {
            if (string.IsNullOrEmpty(treeId))
            {
                throw new ArgumentOutOfRangeException("treeId");
            }

            if (depth < 0)
            {
                throw new ArgumentOutOfRangeException("depth", "Depth of an occurrence should be larger than or equal to 0.");
            }

            if (preorderCode == null || preorderCode.Count <= 0)
            {
                throw new ArgumentNullException("preorderCode");
            }

            this.treeId       = treeId;
            this.depth        = depth;
            this.preorderCode = new ReadOnlyCollection <int>(preorderCode);

            AbleToConnect     = (PreorderCode.Count == 2);
            AbleToBeConnected = preorderCode[0] > 0;
        }
예제 #11
0
 internal bool IsSuperPattern(PatternTree largerPt, Depth depth)
 {
     return(this.IsInducedSuperPattern(largerPt, MiningParams.BackTrackSymbol));
 }
예제 #12
0
 internal static IOccurrence Create(TreeId treeId, Depth depth, IList <int> preorderCode)
 {
     return(new OccInduced(treeId, depth, preorderCode));
 }
예제 #13
0
 internal DepthOccSet(Depth depth)
 {
     this.depth          = depth;
     RootOccurrenceCount = 0;
 }
예제 #14
0
        private IEnumerable <PatternTree> SelectPatternsOfSameRoot(NodeSymbol symbol, ICollection <PatternTree> fDi, Depth depth)
        {
            var pts = new List <PatternTree>();

            if (fDi == null || fDi.Count <= 0)
            {
                return(pts);
            }

            pts.AddRange(fDi.Where(pt => pt.FirstSymbol == symbol && pt.AbleToBeConnected && pt.ContainsDepth(depth)));

            return(pts);
        }
예제 #15
0
        private static void PruneCannotBeClosed(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth)
        {
            if (param.MineFrequent || !(param.MineClosed || param.MineMaximal))
            {
                return;
            }

            var rDi = fRecorder.GetFanout1FrequentsAtDepth(depth);

            rDi.Sort();

            var dic = new Dictionary <NodeSymbol, List <PatternTree> >();

            foreach (var t in rDi)
            {
                if (t.Is2Pattern)
                {
                    continue;
                }

                var key = t.FirstSymbol + "," + t.SecondSymbol;

                if (!dic.ContainsKey(key))
                {
                    dic.Add(key, new List <PatternTree>());
                }

                dic[key].Add(t);
            }

            foreach (var fpSet in dic)
            {
                var keysRedundant = new HashSet <string>();
                for (var i = 0; i < fpSet.Value.Count; i++)
                {
                    var ti = fpSet.Value[i];
                    for (var j = 0; j < fpSet.Value.Count; j++)
                    {
                        var tj = fpSet.Value[j];

                        if (i == j)
                        {
                            continue;
                        }

                        if (ti.Size >= tj.Size || ti.TransactionSupport != tj.TransactionSupport || ti.RootSupport != tj.RootSupport)
                        {
                            continue;
                        }

                        if (!ti.IsSuperPattern(tj, depth))
                        {
                            continue;
                        }
                        var maxDif = (param.SupportType == SupportType.Transaction)
                            ? param.ThresholdTransaction : param.ThresholdRoot;

                        if (ti.NumberOfRightMostOcc - tj.NumberOfRightMostOcc >= maxDif)
                        {
                            continue;
                        }

                        keysRedundant.Add(ti.PreorderString);
                        break;
                    }
                }

                fRecorder.RemoveRedundantForClosed(keysRedundant);
                Debug.WriteLine("Depth:{0} RemoveRedundantForClosed Number={1}", depth, keysRedundant.Count);
            }
        }
예제 #16
0
        internal void PruneAfterConnection(MiningParams param, Depth depth)
        {
            switch (param.SupportType)
            {
            case SupportType.Transaction:
            {
                if (AbleToCombine || AbleToConnect)
                {
                    var t = TransactionSupportAbove(depth + 1, true);
                    if (t < param.ThresholdTransaction)
                    {
                        AbleToCombine = false;
                        AbleToConnect = false;
                    }
                }

                if (AbleToBeConnected)
                {
                    var t = TransactionSupportAbove(depth + 1, false);
                    if (t < param.ThresholdTransaction)
                    {
                        AbleToBeConnected = false;
                    }
                }
            }
            break;

            case SupportType.RootOccurrence:
            {
                if (AbleToCombine || AbleToConnect)
                {
                    var r = RootSupportAbove(depth + 1, true);
                    if (r < param.ThresholdRoot)
                    {
                        AbleToCombine = false;
                        AbleToConnect = false;
                    }
                }

                if (AbleToBeConnected)
                {
                    var r = RootSupportAbove(depth + 1, false);
                    if (r < param.ThresholdRoot)
                    {
                        AbleToBeConnected = false;
                    }
                }
            }
            break;

            case SupportType.Hybrid:
            {
                if (AbleToCombine || AbleToConnect)
                {
                    var t = TransactionSupportAbove(depth + 1, true);
                    if (t < param.ThresholdTransaction)
                    {
                        AbleToCombine = false;
                        AbleToConnect = false;
                    }
                    else
                    {
                        var r = RootSupportAbove(depth + 1, true);
                        if (r < param.ThresholdRoot)
                        {
                            AbleToCombine = false;
                            AbleToConnect = false;
                        }
                    }
                }

                if (AbleToBeConnected)
                {
                    var t = RootSupportAbove(depth + 1, false);
                    if (t < param.ThresholdTransaction)
                    {
                        AbleToBeConnected = false;
                    }
                    else
                    {
                        var r = RootSupportAbove(depth + 1, false);
                        if (r < param.ThresholdRoot)
                        {
                            AbleToBeConnected = false;
                        }
                    }
                }
            }
            break;

            default:
                throw new ArgumentOutOfRangeException();
            }
        }
예제 #17
0
 protected abstract void Combine(Depth depth);
예제 #18
0
 internal PatternTree[] GetClosedAtDepth(Depth depth)
 {
     return(Closeds.Values.Where(pt => pt.ContainsDepth(depth)).ToArray());
 }
예제 #19
0
        internal static bool HasNewConnectOccurrenceAtDepth(this PatternTree p2, PatternTree pt, Depth depth)
        {
            if (p2 == null)
            {
                throw new ArgumentNullException("p2");
            }
            if (!p2.Is2Pattern)
            {
                throw new ArgumentException("The connect pattern must be a 2-pattern.");
            }
            if (pt == null)
            {
                throw new ArgumentNullException("pt");
            }
            if (p2.SecondSymbol != pt.FirstSymbol)
            {
                return(false);
            }

            var depthConnect       = depth;
            var depthToBeConnected = depthConnect + 1;

            if (!p2.ContainsDepth(depthConnect) || !pt.ContainsDepth(depthToBeConnected))
            {
                return(false);
            }

            foreach (TreeOccSet tSet in p2[depthConnect].GetTreeSet())
            {// For every tree that contains p2 at 'depthConnect'
                if (!pt.ContainsTreeAtDepth(depthToBeConnected, tSet.TreeId))
                {
                    continue;
                }
                foreach (RootOcc rSet in tSet.GetRootSet())
                {         // For every root occurrence, check its leaves
                    foreach (IOccurrence iOcc in rSet.GetRightMostSet())
                    {     // checks each leaf, if a leaf of root occurrence of p2 is the root of an occurrence of pt, there might be a new pattern.
                        if (pt[depthToBeConnected][tSet.TreeId].ContainsRootIndex(iOcc.RightMostIndex))
                        { // An occurrence of p2 has a leaf which is the root of an occurrence of pt, a new pattern should be extended.
                            return(true);
                        }
                    }
                }
            }

            return(false);
        }
예제 #20
0
        private void Traversal(PatternTree xPattern, int yIndex, IList <PatternTree> group, Depth depth)
        {
            var pX = xPattern;
            var pY = group[yIndex];

            var childPreStr = pX.CombinePreorderRepresentation(pY).ToPreorderString(MiningParams.Separator);

            PatternTree child = null;

            if (PatternsExtended.AlreadyExtended(childPreStr))
            {
                child = PatternsFrequent.GetPatternAtDepth(childPreStr, depth);
            }
            else if (pX.HasNewCombineOccurrenceAtDepth(pY, depth))
            {
                child = Combine2Patterns(pX, pY, depth);
            }

            if (child == null)
            {
                return;
            }

            for (var i = 0; i < group.Count; i++)
            {
                Traversal(child, i, group, depth);
            }
        }
예제 #21
0
 protected abstract void Connect(Depth depth);
예제 #22
0
        private static void PruneCannotBeExtended(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth)
        {
            var fDi = fRecorder.GetFrequentsAtDepth(depth + 1);

            foreach (var fpt in fDi)
            {
                fpt.PruneAfterConnection(param, depth);
            }

            fRecorder.RemoveCannotBeExtended(depth + 1);
        }
 public RootOcc(TreeId treeId, Depth depth, PreorderIndex rootIndex)
 {
     this.treeId    = treeId;
     this.depth     = depth;
     this.rootIndex = rootIndex;
 }
예제 #24
0
 internal bool ContainsDepth(Depth depth)
 {
     return(DepthOccSet.ContainsKey(depth));
 }
예제 #25
0
 internal bool ContainsTreeAtDepth(Depth depth, TreeId treeId)
 {
     return(DepthOccSet.ContainsKey(depth) && DepthOccSet[depth].ContainsTree(treeId));
 }
예제 #26
0
        private void ConnectTwoPatterns(PatternTree f2, PatternTree fpt, Depth depth)
        {
            if (f2.Size != 2)
            {
                throw new InvalidOperationException("The connect pattern must be 2-pattern.");
            }

            var preList = f2.ConnectPreorderRepresentation(fpt);
            var child   = PatternTree.Create(preList, true, MiningParams);

            PatternsExtended.AddPattern(child);

            var depthC = depth + 1; // Depth of connect

            while (--depthC >= 0)
            {
                if (!f2.ContainsDepth(depthC))
                {
                    continue;
                }
                var depthTbc = depthC + 1; // Depth of to be connected
                if (!fpt.ContainsDepth(depthTbc))
                {
                    continue;
                }

                foreach (TreeOccSet tSet in f2[depthC].GetTreeSet())
                {
                    if (!fpt.ContainsTreeAtDepth(depthTbc, tSet.TreeId))
                    {
                        continue;
                    }

                    foreach (RootOcc root in tSet.GetRootSet())
                    {
                        foreach (IOccurrence f2Occ in root.GetRightMostSet())
                        {
                            if (!fpt[depthTbc][tSet.TreeId].RootSet.ContainsKey(f2Occ.SecondIndex))
                            {
                                continue;
                            }

                            var newOcc = f2Occ.Connect(fpt[depthTbc][tSet.TreeId][f2Occ.SecondIndex].FirstOcc);
                            child.AddOccurrence(newOcc);
                        }
                    }
                }
            }

            if (!child.IsFrequent)
            {
                return;
            }
            PatternsFrequent.AddFrequentPattern(child);

            child.Father = f2;
            child.Mother = fpt;

            f2.CheckMatch(child);
            fpt.CheckMatch(child);
        }
예제 #27
0
 protected override void Connect(Depth depth)
 {
     throw new NotImplementedException();
 }
예제 #28
0
        internal static void PruneAfterConnection(PatternRecorderFrequent fRecorder, MiningParams param, Depth depth)
        {
            if (!param.MineFrequent && (param.MineClosed || param.MineMaximal))
            {
                PruneCannotBeClosed(fRecorder, param, depth);
            }

            PruneCannotBeExtended(fRecorder, param, depth);
        }
예제 #29
0
        internal static bool HasNewCombineOccurrenceAtDepth(this PatternTree xPattern, PatternTree yPattern, Depth depth)
        {
            if (xPattern == null)
            {
                throw new ArgumentNullException("xPattern");
            }
            if (yPattern == null)
            {
                throw new ArgumentNullException("yPattern");
            }

            if (xPattern.FirstSymbol != yPattern.FirstSymbol)
            {
                return(false);
            }
            if (!xPattern.ContainsDepth(depth) || !yPattern.ContainsDepth(depth))
            {
                return(false);
            }

            foreach (TreeOccSet tree in xPattern[depth].GetTreeSet())
            {
                if (!yPattern.ContainsTreeAtDepth(depth, tree.TreeId))
                {
                    continue;
                }
                foreach (RootOcc rSet in tree.GetRootSet())
                {
                    if (!yPattern[depth][tree.TreeId].ContainsRootIndex(rSet.RootIndex))
                    {
                        continue;
                    }
                    foreach (IOccurrence occY in yPattern[depth][tree.TreeId][rSet.RootIndex].GetRightMostSet())
                    {
                        if (rSet.FirstOcc.RightMostIndex < occY.SecondIndex)
                        {
                            return(true);
                        }
                    }
                }
            }

            return(false);
        }
예제 #30
0
 internal List <PatternTree> GetConnectableAtDepth(Depth depth)
 {
     return(Frequent2Pts.Values.Where(f2 => f2.AbleToConnect && f2.ContainsDepth(depth)).ToList());
 }