public static List<SmallRNASequenceContig> GetTopContig(List<SmallRNASequence> sequences, double minimumOverlapRate, int maximumExtensionBase) { List<SmallRNASequenceContig> result = new List<SmallRNASequenceContig>(); foreach (var seq in sequences) { var contig = new SmallRNASequenceContig(); contig.ContigSequence = seq.Sequence; contig.ContigCount = seq.Count; contig.Sequences.Add(seq); result.Add(contig); } return GetTopContig(result, minimumOverlapRate, maximumExtensionBase); }
public static List<SmallRNASequenceContig> BuildContigByIdenticalSequence(Dictionary<string, List<SmallRNASequence>> counts, int topNumber = int.MaxValue) { //Get unique sequences List<IGrouping<string, SmallRNASequence>> sequences; if (topNumber == int.MaxValue) { sequences = (from map in counts.Values let smap = map from seq in smap select seq).GroupBy(m => m.Sequence). OrderByDescending(m => m.Sum(l => l.Count)).ToList(); } else { sequences = (from map in counts.Values let smap = map.Take(Math.Min(map.Count, topNumber)).ToList() from seq in smap select seq).GroupBy(m => m.Sequence). OrderByDescending(m => m.Sum(l => l.Count)).ToList(); } //Initialize Sequence~Contig map var resultMap = new Dictionary<string, SmallRNASequenceContig>(); foreach (var seq in sequences) { var contig = new SmallRNASequenceContig(); contig.ContigSequence = seq.Key; resultMap[seq.Key] = contig; } //Add smallRNAsequence into Sequence~Contig map foreach (var map in counts.Values) { foreach (var count in map) { SmallRNASequenceContig contig; if (resultMap.TryGetValue(count.Sequence, out contig)) { contig.Sequences.Add(count); } } } //Initialize config count foreach (var seq in resultMap.Values) { seq.ContigCount = seq.Sequences.Sum(l => l.Count); } return (from v in resultMap.Values orderby v.ContigCount descending select v).ToList(); }
public static List<SmallRNASequenceContig> BuildMiniContig(List<SmallRNASequenceContig> contigs, int topNumber) { var result = new List<SmallRNASequenceContig>(); for (int i = 0; i < contigs.Count && i < topNumber; i++) { var contig = contigs[i]; var seqCount = (from seq in contig.Sequences.GroupBy(l => l.Sequence) select new { Seq = seq.Key, SeqCount = seq.Sum(l => l.Count), SeqIndex = contig.ContigSequence.IndexOf(seq.Key) }).OrderByDescending(l => l.SeqCount).ToList(); //seqCount.ForEach(l => Console.WriteLine("{0}\t{1}\t{2}", l.Seq, l.SeqCount, l.SeqIndex)); var fq = seqCount[0]; var list = seqCount.Where(l => Math.Abs(l.SeqIndex - fq.SeqIndex) < 3 && l.SeqCount >= fq.SeqCount * 0.1).ToList(); var start = list.Min(l => l.SeqIndex); var end = list.Max(l => l.SeqIndex + l.Seq.Length); var contigSeq = contig.ContigSequence.Substring(start, end - start); var contigSequences = new HashSet<string>(list.ConvertAll(l => l.Seq)); var miniContig = new SmallRNASequenceContig(); miniContig.ContigSequence = contigSeq; miniContig.ContigCount = list.Sum(l => l.SeqCount); miniContig.Sequences.AddRange(contig.Sequences.Where(l => contigSequences.Contains(l.Sequence))); result.Add(miniContig); } return result; }