public List <SequenceSupport> FindSequentialPatterns(List <List <List <string> > > datasetSequences, int support) { List <SequenceSupport> frequentSequences = new List <SequenceSupport>(); List <SequenceSupport> candidates = new List <SequenceSupport>(); float supportFrequency = ((float)support / 100) * datasetSequences.Count(); //Find all 1 items List <string> items = new List <string>(); items = datasetSequences.SelectMany(a => a.SelectMany(i => i)).Distinct().OrderBy(a => a).ToList(); foreach (string item in items) { SequenceSupport candidate = new SequenceSupport(); candidate.sequence = new List <List <string> >() { new List <string>() { item } }; candidates.Add(candidate); } //Calculate 1 itemsets frequency candidates = CountSupport(datasetSequences, candidates); //Find frequent 1 itemsset frequentSequences = candidates.Where(a => a.support >= supportFrequency).ToList(); List <List <List <string> > > lastFrequentSequences = frequentSequences.Select(a => a.sequence).ToList(); int sequenceLenght = 0; System.Diagnostics.Debug.WriteLine("Start Sequence Lenght:" + sequenceLenght++ + " : " + DateTime.Now.TimeOfDay); bool l1Join = true; while (lastFrequentSequences.Count > 0) { System.Diagnostics.Debug.WriteLine("Start Sequence Lenght:" + sequenceLenght++ + " : " + DateTime.Now.TimeOfDay); if (l1Join == true) { candidates = CandidateGenerationC2(lastFrequentSequences); l1Join = false; } else { candidates = CandidateGeneration(lastFrequentSequences); candidates = CandidatePruning(candidates, lastFrequentSequences); } candidates = SupportCounting(datasetSequences, candidates); candidates = CandidateElimination(candidates, supportFrequency); frequentSequences.AddRange(candidates); lastFrequentSequences = candidates.Select(a => a.sequence).ToList(); } //comment! return(frequentSequences); }
private bool candidateNotExist(List <SequenceSupport> candidates, SequenceSupport candidate) { bool notexist = true; foreach (SequenceSupport existingCandidate in candidates) { if (CompareListofLists(existingCandidate.sequence, candidate.sequence)) { notexist = false; } } return(notexist); }
private List <SequenceSupport> CandidateGenerationC2(List <List <List <string> > > lastFrequentSequences) { List <SequenceSupport> candidates = new List <SequenceSupport>(); foreach (List <List <string> > lastFrequentSequence1 in lastFrequentSequences) { foreach (List <List <string> > lastFrequentSequence2 in lastFrequentSequences) { string item1 = lastFrequentSequence1.First()[0]; string item2 = lastFrequentSequence2.First()[0]; SequenceSupport candidate1 = new SequenceSupport(); candidate1.sequence = new List <List <string> >() { new List <string>() { item1 }, new List <string>() { item2 } }; candidates.Add(candidate1); if (string.Compare(item1, item2) < 0) { SequenceSupport candidate2 = new SequenceSupport(); candidate2.sequence = new List <List <string> >() { new List <string>() { item1, item2 } }; candidates.Add(candidate2); } } } return(candidates); }
private List <SequenceSupport> ReadFrequentSequences() { List <SequenceSupport> sequenceSupports = new List <SequenceSupport>(); var a = _dbContext.FrequentSequentialPattern.Where(r => r.Train == true); foreach (var item in a) { char[] splitChars = new[] { '(', ')', '<', '>' }; string[] itemsets = item.Sequence.Split(splitChars, StringSplitOptions.RemoveEmptyEntries); List <List <string> > itemsetsList = new List <List <string> >(); foreach (string i in itemsets) { splitChars = new[] { ' ', '-' }; string[] itemset = i.Split(splitChars, StringSplitOptions.RemoveEmptyEntries); List <string> itemsetList = new List <string>(itemset); itemsetsList.Add(itemsetList); } SequenceSupport sequenceSupport = new SequenceSupport(); sequenceSupport.support = item.Support; sequenceSupport.sequence = itemsetsList; sequenceSupports.Add(sequenceSupport); } return(sequenceSupports); }
private List <SequenceSupport> CandidateGeneration(List <List <List <string> > > lastFrequentSequences) { List <SequenceTrimed> SequencesTrimed = new List <SequenceTrimed>(); foreach (List <List <string> > sequence in lastFrequentSequences) { SequenceTrimed SequenceTrimed = new SequenceTrimed(); SequenceTrimed.sequenceOriginal = new List <List <string> >(sequence); List <List <string> > sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList())); //First itemset lenght ==1 E.g. A(BC) if (sequence[0].Count == 1) { sequenceCopied.RemoveAt(0); SequenceTrimed.sequenceRemovedFirst = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList())); int sequenceCount = sequence.Count(); if (sequence[sequenceCount - 1].Count == 1) { //First itemset lenght ==1 & last itemset==1 ----- E.g. AB sequenceCopied.RemoveAt(sequenceCount - 1); SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); SequenceTrimed.lastItem = sequence[sequenceCount - 1].First(); SequencesTrimed.Add(SequenceTrimed); } else { //First itemset lenght == 1 & last itemset > 1 ----- E.g. A(BC) foreach (string item in sequence[sequenceCount - 1]) { sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList())); sequenceCopied[sequenceCount - 1].Remove(item); SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); SequenceTrimed.lastItem = item; SequencesTrimed.Add(SequenceTrimed); } } } else { foreach (string item in sequence[0]) { sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList())); sequenceCopied[0].Remove(item); SequenceTrimed.sequenceRemovedFirst = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); int sequenceCount = sequence.Count(); if (sequence[sequenceCount - 1].Count == 1) { //First itemset lenght > 1 & last itemset == 1 ----- E.g. (AB)C sequenceCopied.RemoveAt(sequenceCount - 1); SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); SequenceTrimed.lastItem = sequence[sequenceCount - 1].First(); SequencesTrimed.Add(SequenceTrimed); } else { foreach (string itemlast in sequence[sequenceCount - 1]) { //First itemset lenght > 1 & last itemset > 1 ----- E.g. (AB)(CD) sequenceCopied = sequence.ConvertAll(a => new List <string>(a.ToList())); sequenceCopied[sequenceCount - 1].Remove(itemlast); SequenceTrimed.sequenceRemovedLast = sequenceCopied.ConvertAll(a => new List <string>(a.ToList())); SequenceTrimed.lastItem = itemlast; SequencesTrimed.Add(SequenceTrimed); } } } } } List <SequenceSupport> candidates = new List <SequenceSupport>(); foreach (SequenceTrimed sequenceTrimed1 in SequencesTrimed) { foreach (SequenceTrimed sequenceTrimed2 in SequencesTrimed) { if (CompareListofLists(sequenceTrimed1.sequenceRemovedFirst, sequenceTrimed2.sequenceRemovedLast)) { SequenceSupport candidate = new SequenceSupport(); if (sequenceTrimed2.sequenceOriginal.Last().Count() == 1) { candidate.sequence = sequenceTrimed1.sequenceOriginal.ConvertAll(a => new List <string>(a.ToList())); candidate.sequence.Add(new List <string>() { sequenceTrimed2.lastItem }); if (candidateNotExist(candidates, candidate)) { candidates.Add(candidate); } } else { if (!sequenceTrimed1.sequenceOriginal.Last().Contains(sequenceTrimed2.lastItem)) { candidate.sequence = sequenceTrimed1.sequenceOriginal.ConvertAll(a => new List <string>(a.ToList()));; candidate.sequence.Last().Add(sequenceTrimed2.lastItem); if (candidateNotExist(candidates, candidate)) { candidates.Add(candidate); } } } } } } return(candidates); }