public int AddByScore(SAMAlignedItem item, int count, int minScore)
        {
            int    result = 0;
            string align, score;

            item.GetSequenceScore(out align, out score);

            for (int i = 0; i < align.Length; i++)
            {
                if (minScore > 0)
                {
                    var bq = (int)(score[i]) - 33;
                    if (bq < minScore)
                    {
                        result++;
                        continue;
                    }
                }

                var c        = align[i];
                var dic      = Count[(int)(item.Pos) + i];
                int curcount = 0;
                if (dic.TryGetValue(c, out curcount))
                {
                    dic[c] = curcount + count;
                }
                else
                {
                    dic[c] = count;
                }
            }

            return(result);
        }
        public List <PileupCount> Add(SAMAlignedItem item, int count)
        {
            List <PileupCount> result = null;

            if (!item.Locations[0].Seqname.Equals(this.Chromosome))
            {
                result = Count;
                Count  = new List <PileupCount>();
            }
            else if (this.Position != -1)
            {
                if (item.Pos > this.Count.Last().Position)
                {
                    result = Count;
                    Count  = new List <PileupCount>();
                }
                else
                {
                    int finishedCount = (int)(item.Pos - this.Position);
                    if (finishedCount > 0)
                    {
                        result = new List <PileupCount>();
                        result.AddRange(Count.Take(finishedCount));
                        Count.RemoveRange(0, finishedCount);
                    }
                }
            }

            string align, refer;

            item.GetSequences(out align, out refer);
            for (int i = Count.Count; i < align.Length; i++)
            {
                Count.Add(new PileupCount()
                {
                    Chromosome = item.Locations[0].Seqname,
                    Position   = item.Locations[0].Start + i,
                    Reference  = refer[i]
                });
            }

            for (int i = 0; i < align.Length; i++)
            {
                var c        = align[i];
                var dic      = Count[i];
                int curcount = 0;
                if (dic.TryGetValue(c, out curcount))
                {
                    dic[c] = curcount + count;
                }
                else
                {
                    dic[c] = count;
                }
            }

            return(result);
        }
        public void Add(SAMAlignedItem item, int count)
        {
            string align, refer;

            item.GetSequences(out align, out refer);

            for (int i = 0; i < align.Length; i++)
            {
                var c        = align[i];
                var dic      = Count[(int)(item.Pos) + i];
                int curcount = 0;
                if (dic.TryGetValue(c, out curcount))
                {
                    dic[c] = curcount + count;
                }
                else
                {
                    dic[c] = count;
                }
            }
        }
        public SAMAlignedItem NextSAMAlignedItem()
        {
            string line;

            while ((line = _file.ReadLine()) != null)
            {
                var parts = line.Split('\t');

                var qname = parts[SAMFormatConst.QNAME_INDEX];
                var seq   = parts[SAMFormatConst.SEQ_INDEX];

                var flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                //unmatched
                if (flag.HasFlag(SAMFlags.UnmappedQuery))
                {
                    continue;
                }

                //check map quality
                var mapq = int.Parse(parts[SAMFormatConst.MAPQ_INDEX]);
                if (mapq < _options.MinimumReadQuality)
                {
                    continue;
                }

                var sam = new SAMAlignedItem
                {
                    Qname = qname,
                };

                bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                char strand;
                if (isReversed)
                {
                    strand       = '-';
                    sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
                }
                else
                {
                    strand       = '+';
                    sam.Sequence = seq;
                }

                var loc = new SAMAlignedLocation(sam)
                {
                    Seqname           = parts[SAMFormatConst.RNAME_INDEX],
                    Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                    Strand            = strand,
                    Cigar             = parts[SAMFormatConst.CIGAR_INDEX],
                    MismatchPositions = _format.GetMismatchPositions(parts),
                    NumberOfMismatch  = _format.GetNumberOfMismatch(parts),
                    Sequence          = seq,
                    Qual = parts[SAMFormatConst.QUAL_INDEX]
                };

                loc.ParseEnd(sam.Sequence);
                sam.AddLocation(loc);

                if (_format.HasAlternativeHits)
                {
                    _format.ParseAlternativeHits(parts, sam);
                }

                return(sam);
            }

            return(null);
        }
Ejemplo n.º 5
0
        public override IEnumerable <string> Process()
        {
            PileupCountList pc = new PileupCountList();

            var format = options.GetSAMFormat();

            var cm = new SmallRNACountMap(options.CountFile);

            var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf);

            srItems.ForEach(m =>
            {
                m.Seqname = m.Seqname.StringAfter("chr");
            });
            var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList());

            StreamWriter swScript = null;

            try
            {
                if (options.ExportIgvScript)
                {
                    swScript = new StreamWriter(options.OutputFile + ".igv");
                    swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/'));
                }

                using (StreamWriter sw = new StreamWriter(options.OutputFile))
                {
                    sw.WriteLine(@"##fileformat=VCFv4.2
##fileDate={0:yyyyMMdd}
##source={1}
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data"">
##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth"">
##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency"">
##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value"">
##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position"">
##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}"">
##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}"">
##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus"">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth"">
##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth"">
#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  {4}",
                                 DateTime.Now,
                                 "PileupCountBuilder",
                                 options.FisherPValue,
                                 options.MinimumAlternativeAlleleFrequency,
                                 Path.GetFileNameWithoutExtension(options.InputFile));

                    using (var sr = SAMFactory.GetReader(options.InputFile, true))
                    {
                        int    count = 0;
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            count++;

                            if (count % 100 == 0)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads processed", count);
                            }

                            var parts = line.Split('\t');

                            var qname = parts[SAMFormatConst.QNAME_INDEX];
                            var seq   = parts[SAMFormatConst.SEQ_INDEX];

                            //too short
                            if (seq.Length < options.MinimumReadLength)
                            {
                                continue;
                            }

                            SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                            //unmatched
                            if (flag.HasFlag(SAMFlags.UnmappedQuery))
                            {
                                continue;
                            }

                            var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                            //insertion/deletion
                            if (cigar.Any(m => m == 'I' || m == 'D'))
                            {
                                continue;
                            }

                            var sam = new SAMAlignedItem()
                            {
                                Qname = qname,
                            };

                            bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                            char strand;
                            if (isReversed)
                            {
                                strand       = '-';
                                sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
                            }
                            else
                            {
                                strand       = '+';
                                sam.Sequence = seq;
                            }

                            var loc = new SAMAlignedLocation(sam)
                            {
                                Seqname           = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"),
                                Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                                Strand            = strand,
                                Cigar             = parts[SAMFormatConst.CIGAR_INDEX],
                                MismatchPositions = format.GetMismatchPositions(parts),
                                NumberOfMismatch  = format.GetNumberOfMismatch(parts),
                                Sequence          = seq
                            };

                            loc.ParseEnd(sam.Sequence);
                            sam.AddLocation(loc);

                            if (format.HasAlternativeHits)
                            {
                                format.ParseAlternativeHits(parts, sam);
                            }

                            var finished = pc.Add(sam, cm.GetCount(sam.Qname));
                            if (null == finished || 0 == finished.Count)
                            {
                                continue;
                            }

                            foreach (var fin in finished)
                            {
                                //if (fin.Chromosome.Equals("1") && fin.Position == 5160725)
                                //{
                                //  Console.WriteLine(fin);
                                //}
                                var ft = fin.FisherExactTest();
                                if (ft.PValue <= options.FisherPValue)
                                {
                                    var total     = fin.Sum(m => m.Value);
                                    var minallele = total * options.MinimumAlternativeAlleleFrequency;
                                    if (ft.Sample2.Failed >= minallele)
                                    {
                                        List <GtfItem> srs;
                                        List <string>  ranges = new List <string>();

                                        if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs))
                                        {
                                            foreach (var seqr in srs)
                                            {
                                                if (seqr.Contains(fin.Position))
                                                {
                                                    ranges.Add(seqr.GetNameLocation());
                                                }
                                            }
                                        }

                                        var alter = (from r in fin
                                                     where r.Key != fin.Reference
                                                     orderby r.Key
                                                     select r).ToList();

                                        var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}",
                                                                fin.Chromosome,
                                                                fin.Position,
                                                                ".",
                                                                fin.Reference,
                                                                (from r in alter
                                                                 select r.Key.ToString()).Merge(","),
                                                                0,
                                                                ranges.Count == 0 ? "notMiRNA" : "PASS",
                                                                1,
                                                                total,
                                                                (from r in alter
                                                                 select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","),
                                                                ft.PValue,
                                                                ranges.Count == 0 ? "" : ";" + ranges.Merge(","),
                                                                total,
                                                                ft.Sample2.Succeed,
                                                                (from r in alter
                                                                 select r.Value.ToString()).Merge(","));

                                        sw.WriteLine(str);
                                        //Console.WriteLine(str);

                                        if (swScript != null && ranges.Count > 0)
                                        {
                                            swScript.WriteLine(@"goto {0}:{1}
sort position
snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_'));
                                        }
                                    }
                                }
                            }

                            finished.Clear();
                        }
                    }
                }
            }
            finally
            {
                if (swScript != null)
                {
                    swScript.Close();
                }
            }
            return(new string[] { options.OutputFile });
        }
 private static string GetSequence(char gstrand, SAMAlignedItem m)
 {
     return(gstrand == '+' ? m.Sequence : SequenceUtils.GetReverseComplementedSequence(m.Sequence));
 }
        /// <summary>
        /// Add alignment result and return the completed positions
        /// </summary>
        /// <param name="item">alignment result</param>
        /// <returns>completed positions</returns>
        public List <AlignedPositionMap> Add(SAMAlignedItem item)
        {
            List <AlignedPositionMap> result = null;

            //if the alignment result moves to another chromosome, all uncompleted positions
            //will be completed.
            if (!item.Locations[0].Seqname.Equals(this.Chromosome))
            {
                result      = Positions;
                Positions   = new List <AlignedPositionMap>();
                PositionMap = new Dictionary <long, AlignedPositionMap>();
            }
            else if (this.Position != -1)
            {
                //if the alignment result position is larger than the last position in the uncompleted positions,
                //all uncompleted positions will be completed.
                if (item.Pos > this.Positions.Last().Position)
                {
                    result      = Positions;
                    Positions   = new List <AlignedPositionMap>();
                    PositionMap = new Dictionary <long, AlignedPositionMap>();
                }
                else
                {
                    //set up the completed list
                    result = new List <AlignedPositionMap>();
                    while (Positions[0].Position < item.Pos)
                    {
                        result.Add(Positions[0]);
                        PositionMap.Remove(Positions[0].Position);
                        Positions.RemoveAt(0);
                    }
                }
            }

            List <AlignedPosition> align = item.GetAlignedPositions();

            foreach (var asp in align)
            {
                AlignedPositionMap dic;
                if (!PositionMap.TryGetValue(asp.Position, out dic))
                {
                    dic = new AlignedPositionMap()
                    {
                        Chromosome = item.Locations[0].Seqname,
                        Position   = asp.Position
                    };
                    Positions.Add(dic);
                    PositionMap[dic.Position] = dic;
                }

                List <AlignedPosition> curcount;
                if (!dic.TryGetValue(asp.AlignedEvent, out curcount))
                {
                    curcount = new List <AlignedPosition>();
                    dic[asp.AlignedEvent] = curcount;
                }
                curcount.Add(asp);
            }

            return(result);
        }