public static List <SmallRNASequenceContig> BuildMiniContig(List <SmallRNASequenceContig> contigs, int topNumber) { var result = new List <SmallRNASequenceContig>(); for (int i = 0; i < contigs.Count && i < topNumber; i++) { var contig = contigs[i]; var seqCount = (from seq in contig.Sequences.GroupBy(l => l.Sequence) select new { Seq = seq.Key, SeqCount = seq.Sum(l => l.Count), SeqIndex = contig.ContigSequence.IndexOf(seq.Key) }).OrderByDescending(l => l.SeqCount).ToList(); //seqCount.ForEach(l => Console.WriteLine("{0}\t{1}\t{2}", l.Seq, l.SeqCount, l.SeqIndex)); var fq = seqCount[0]; var list = seqCount.Where(l => Math.Abs(l.SeqIndex - fq.SeqIndex) < 3 && l.SeqCount >= fq.SeqCount * 0.1).ToList(); var start = list.Min(l => l.SeqIndex); var end = list.Max(l => l.SeqIndex + l.Seq.Length); var contigSeq = contig.ContigSequence.Substring(start, end - start); var contigSequences = new HashSet <string>(list.ConvertAll(l => l.Seq)); var miniContig = new SmallRNASequenceContig(); miniContig.ContigSequence = contigSeq; miniContig.ContigCount = list.Sum(l => l.SeqCount); miniContig.Sequences.AddRange(contig.Sequences.Where(l => contigSequences.Contains(l.Sequence))); result.Add(miniContig); } return(result); }
public static List <SmallRNASequenceContig> BuildContigByIdenticalSequence(Dictionary <string, List <SmallRNASequence> > counts, int topNumber = int.MaxValue) { //Get unique sequences List <IGrouping <string, SmallRNASequence> > sequences; if (topNumber == int.MaxValue) { sequences = (from map in counts.Values let smap = map from seq in smap select seq).GroupBy(m => m.Sequence). OrderByDescending(m => m.Sum(l => l.Count)).ToList(); } else { sequences = (from map in counts.Values let smap = map.Take(Math.Min(map.Count, topNumber)).ToList() from seq in smap select seq).GroupBy(m => m.Sequence). OrderByDescending(m => m.Sum(l => l.Count)).ToList(); } //Initialize Sequence~Contig map var resultMap = new Dictionary <string, SmallRNASequenceContig>(); foreach (var seq in sequences) { var contig = new SmallRNASequenceContig(); contig.ContigSequence = seq.Key; resultMap[seq.Key] = contig; } //Add smallRNAsequence into Sequence~Contig map foreach (var map in counts.Values) { foreach (var count in map) { SmallRNASequenceContig contig; if (resultMap.TryGetValue(count.Sequence, out contig)) { contig.Sequences.Add(count); } } } //Initialize config count Dictionary <string, int> totalCounts = GetSampleCountMap(counts); CalculateNormalizedContigCount(resultMap.Values.ToList(), totalCounts); return((from v in resultMap.Values orderby v.ContigCount descending select v).ToList()); }
public static List <SmallRNASequenceContig> GetTopContig(List <SmallRNASequence> sequences, double minimumOverlapRate, int maximumExtensionBase) { List <SmallRNASequenceContig> result = new List <SmallRNASequenceContig>(); foreach (var seq in sequences) { var contig = new SmallRNASequenceContig(); contig.ContigSequence = seq.Sequence; contig.ContigCount = seq.Count; contig.Sequences.Add(seq); result.Add(contig); } return(GetTopContig(result, minimumOverlapRate, maximumExtensionBase)); }