コード例 #1
0
        public static List <SmallRNASequenceContig> BuildMiniContig(List <SmallRNASequenceContig> contigs, int topNumber)
        {
            var result = new List <SmallRNASequenceContig>();

            for (int i = 0; i < contigs.Count && i < topNumber; i++)
            {
                var contig   = contigs[i];
                var seqCount = (from seq in contig.Sequences.GroupBy(l => l.Sequence)
                                select new { Seq = seq.Key, SeqCount = seq.Sum(l => l.Count), SeqIndex = contig.ContigSequence.IndexOf(seq.Key) }).OrderByDescending(l => l.SeqCount).ToList();
                //seqCount.ForEach(l => Console.WriteLine("{0}\t{1}\t{2}", l.Seq, l.SeqCount, l.SeqIndex));

                var fq              = seqCount[0];
                var list            = seqCount.Where(l => Math.Abs(l.SeqIndex - fq.SeqIndex) < 3 && l.SeqCount >= fq.SeqCount * 0.1).ToList();
                var start           = list.Min(l => l.SeqIndex);
                var end             = list.Max(l => l.SeqIndex + l.Seq.Length);
                var contigSeq       = contig.ContigSequence.Substring(start, end - start);
                var contigSequences = new HashSet <string>(list.ConvertAll(l => l.Seq));

                var miniContig = new SmallRNASequenceContig();
                miniContig.ContigSequence = contigSeq;
                miniContig.ContigCount    = list.Sum(l => l.SeqCount);
                miniContig.Sequences.AddRange(contig.Sequences.Where(l => contigSequences.Contains(l.Sequence)));
                result.Add(miniContig);
            }

            return(result);
        }
コード例 #2
0
        public static List <SmallRNASequenceContig> BuildContigByIdenticalSequence(Dictionary <string, List <SmallRNASequence> > counts, int topNumber = int.MaxValue)
        {
            //Get unique sequences
            List <IGrouping <string, SmallRNASequence> > sequences;

            if (topNumber == int.MaxValue)
            {
                sequences = (from map in counts.Values
                             let smap = map
                                        from seq in smap
                                        select seq).GroupBy(m => m.Sequence).
                            OrderByDescending(m => m.Sum(l => l.Count)).ToList();
            }
            else
            {
                sequences = (from map in counts.Values
                             let smap = map.Take(Math.Min(map.Count, topNumber)).ToList()
                                        from seq in smap
                                        select seq).GroupBy(m => m.Sequence).
                            OrderByDescending(m => m.Sum(l => l.Count)).ToList();
            }

            //Initialize Sequence~Contig map
            var resultMap = new Dictionary <string, SmallRNASequenceContig>();

            foreach (var seq in sequences)
            {
                var contig = new SmallRNASequenceContig();
                contig.ContigSequence = seq.Key;
                resultMap[seq.Key]    = contig;
            }

            //Add smallRNAsequence into Sequence~Contig map
            foreach (var map in counts.Values)
            {
                foreach (var count in map)
                {
                    SmallRNASequenceContig contig;
                    if (resultMap.TryGetValue(count.Sequence, out contig))
                    {
                        contig.Sequences.Add(count);
                    }
                }
            }

            //Initialize config count
            Dictionary <string, int> totalCounts = GetSampleCountMap(counts);

            CalculateNormalizedContigCount(resultMap.Values.ToList(), totalCounts);

            return((from v in resultMap.Values
                    orderby v.ContigCount descending
                    select v).ToList());
        }
コード例 #3
0
        public static List <SmallRNASequenceContig> GetTopContig(List <SmallRNASequence> sequences, double minimumOverlapRate, int maximumExtensionBase)
        {
            List <SmallRNASequenceContig> result = new List <SmallRNASequenceContig>();

            foreach (var seq in sequences)
            {
                var contig = new SmallRNASequenceContig();
                contig.ContigSequence = seq.Sequence;
                contig.ContigCount    = seq.Count;
                contig.Sequences.Add(seq);
                result.Add(contig);
            }

            return(GetTopContig(result, minimumOverlapRate, maximumExtensionBase));
        }