Beispiel #1
0
        public List <SequenceSupport> FindSequentialPatterns(List <List <List <string> > > datasetSequences, int support)
        {
            List <SequenceSupport> frequentSequences = new List <SequenceSupport>();
            List <SequenceSupport> candidates        = new List <SequenceSupport>();

            float supportFrequency = ((float)support / 100) * datasetSequences.Count();

            //Find all 1 items
            List <string> items = new List <string>();

            items = datasetSequences.SelectMany(a => a.SelectMany(i => i)).Distinct().OrderBy(a => a).ToList();

            foreach (string item in items)
            {
                SequenceSupport candidate = new SequenceSupport();
                candidate.sequence = new List <List <string> >()
                {
                    new List <string>()
                    {
                        item
                    }
                };
                candidates.Add(candidate);
            }

            //Calculate 1 itemsets frequency
            candidates = CountSupport(datasetSequences, candidates);

            //Find frequent 1 itemsset
            frequentSequences = candidates.Where(a => a.support >= supportFrequency).ToList();
            List <List <List <string> > > lastFrequentSequences = frequentSequences.Select(a => a.sequence).ToList();

            int sequenceLenght = 0;

            System.Diagnostics.Debug.WriteLine("Start Sequence Lenght:" + sequenceLenght++ + " : " + DateTime.Now.TimeOfDay);
            bool l1Join = true;

            while (lastFrequentSequences.Count > 0)
            {
                System.Diagnostics.Debug.WriteLine("Start Sequence Lenght:" + sequenceLenght++ + " : " + DateTime.Now.TimeOfDay);
                if (l1Join == true)
                {
                    candidates = CandidateGenerationC2(lastFrequentSequences);
                    l1Join     = false;
                }
                else
                {
                    candidates = CandidateGeneration(lastFrequentSequences);
                    candidates = CandidatePruning(candidates, lastFrequentSequences);
                }
                candidates = SupportCounting(datasetSequences, candidates);
                candidates = CandidateElimination(candidates, supportFrequency);

                frequentSequences.AddRange(candidates);
                lastFrequentSequences = candidates.Select(a => a.sequence).ToList();
            }
            //comment!

            return(frequentSequences);
        }
Beispiel #2
0
        private bool candidateNotExist(List <SequenceSupport> candidates, SequenceSupport candidate)
        {
            bool notexist = true;

            foreach (SequenceSupport existingCandidate in candidates)
            {
                if (CompareListofLists(existingCandidate.sequence, candidate.sequence))
                {
                    notexist = false;
                }
            }
            return(notexist);
        }
Beispiel #3
0
        private List <SequenceSupport> CandidateGenerationC2(List <List <List <string> > > lastFrequentSequences)
        {
            List <SequenceSupport> candidates = new List <SequenceSupport>();

            foreach (List <List <string> > lastFrequentSequence1 in lastFrequentSequences)
            {
                foreach (List <List <string> > lastFrequentSequence2 in lastFrequentSequences)
                {
                    string item1 = lastFrequentSequence1.First()[0];
                    string item2 = lastFrequentSequence2.First()[0];

                    SequenceSupport candidate1 = new SequenceSupport();
                    candidate1.sequence = new List <List <string> >()
                    {
                        new List <string>()
                        {
                            item1
                        }, new List <string>()
                        {
                            item2
                        }
                    };
                    candidates.Add(candidate1);

                    if (string.Compare(item1, item2) < 0)
                    {
                        SequenceSupport candidate2 = new SequenceSupport();
                        candidate2.sequence = new List <List <string> >()
                        {
                            new List <string>()
                            {
                                item1, item2
                            }
                        };
                        candidates.Add(candidate2);
                    }
                }
            }
            return(candidates);
        }
Beispiel #4
0
        private List <SequenceSupport> ReadFrequentSequences()
        {
            List <SequenceSupport> sequenceSupports = new List <SequenceSupport>();
            var a = _dbContext.FrequentSequentialPattern.Where(r => r.Train == true);

            foreach (var item in a)
            {
                char[]   splitChars = new[] { '(', ')', '<', '>' };
                string[] itemsets   = item.Sequence.Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
                List <List <string> > itemsetsList = new List <List <string> >();
                foreach (string i in itemsets)
                {
                    splitChars = new[] { ' ', '-' };
                    string[]      itemset     = i.Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
                    List <string> itemsetList = new List <string>(itemset);
                    itemsetsList.Add(itemsetList);
                }
                SequenceSupport sequenceSupport = new SequenceSupport();
                sequenceSupport.support  = item.Support;
                sequenceSupport.sequence = itemsetsList;
                sequenceSupports.Add(sequenceSupport);
            }
            return(sequenceSupports);
        }
Beispiel #5
0
        private List <SequenceSupport> CandidateGeneration(List <List <List <string> > > lastFrequentSequences)
        {
            List <SequenceTrimed> SequencesTrimed = new List <SequenceTrimed>();

            foreach (List <List <string> > sequence in lastFrequentSequences)
            {
                SequenceTrimed SequenceTrimed = new SequenceTrimed();
                SequenceTrimed.sequenceOriginal = new List <List <string> >(sequence);
                List <List <string> > sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList()));
                //First itemset lenght ==1  E.g. A(BC)
                if (sequence[0].Count == 1)
                {
                    sequenceCopied.RemoveAt(0);
                    SequenceTrimed.sequenceRemovedFirst = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));


                    sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList()));
                    int sequenceCount = sequence.Count();
                    if (sequence[sequenceCount - 1].Count == 1)
                    {
                        //First itemset lenght ==1 & last itemset==1 ----- E.g. AB
                        sequenceCopied.RemoveAt(sequenceCount - 1);
                        SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));
                        SequenceTrimed.lastItem            = sequence[sequenceCount - 1].First();

                        SequencesTrimed.Add(SequenceTrimed);
                    }
                    else
                    {
                        //First itemset lenght == 1 & last itemset > 1 ----- E.g. A(BC)
                        foreach (string item in sequence[sequenceCount - 1])
                        {
                            sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList()));
                            sequenceCopied[sequenceCount - 1].Remove(item);
                            SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));
                            SequenceTrimed.lastItem            = item;

                            SequencesTrimed.Add(SequenceTrimed);
                        }
                    }
                }
                else
                {
                    foreach (string item in sequence[0])
                    {
                        sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList()));
                        sequenceCopied[0].Remove(item);
                        SequenceTrimed.sequenceRemovedFirst = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));

                        int sequenceCount = sequence.Count();
                        if (sequence[sequenceCount - 1].Count == 1)
                        {
                            //First itemset lenght > 1 & last itemset == 1 ----- E.g. (AB)C
                            sequenceCopied.RemoveAt(sequenceCount - 1);
                            SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));
                            SequenceTrimed.lastItem            = sequence[sequenceCount - 1].First();

                            SequencesTrimed.Add(SequenceTrimed);
                        }
                        else
                        {
                            foreach (string itemlast in sequence[sequenceCount - 1])
                            {
                                //First itemset lenght > 1 & last itemset > 1 ----- E.g. (AB)(CD)
                                sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList()));
                                sequenceCopied[sequenceCount - 1].Remove(itemlast);
                                SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList()));
                                SequenceTrimed.lastItem            = itemlast;

                                SequencesTrimed.Add(SequenceTrimed);
                            }
                        }
                    }
                }
            }

            List <SequenceSupport> candidates = new List <SequenceSupport>();

            foreach (SequenceTrimed sequenceTrimed1 in SequencesTrimed)
            {
                foreach (SequenceTrimed sequenceTrimed2 in SequencesTrimed)
                {
                    if (CompareListofLists(sequenceTrimed1.sequenceRemovedFirst, sequenceTrimed2.sequenceRemovedLast))
                    {
                        SequenceSupport candidate = new SequenceSupport();
                        if (sequenceTrimed2.sequenceOriginal.Last().Count() == 1)
                        {
                            candidate.sequence = sequenceTrimed1.sequenceOriginal.ConvertAll(a => new List <string>(a.ToList()));
                            candidate.sequence.Add(new List <string>()
                            {
                                sequenceTrimed2.lastItem
                            });
                            if (candidateNotExist(candidates, candidate))
                            {
                                candidates.Add(candidate);
                            }
                        }
                        else
                        {
                            if (!sequenceTrimed1.sequenceOriginal.Last().Contains(sequenceTrimed2.lastItem))
                            {
                                candidate.sequence = sequenceTrimed1.sequenceOriginal.ConvertAll(a => new List <string>(a.ToList()));;
                                candidate.sequence.Last().Add(sequenceTrimed2.lastItem);
                                if (candidateNotExist(candidates, candidate))
                                {
                                    candidates.Add(candidate);
                                }
                            }
                        }
                    }
                }
            }
            return(candidates);
        }