public virtual List <FeatureLocation> GetSequenceRegions()
        {
            //Read sequence regions
            var items = SequenceRegionUtils.GetSequenceRegions(CoordinateFile);

            items.ForEach(m =>
            {
                if (m.Seqname.StartsWith("chr"))
                {
                    m.Seqname = m.Seqname.StringAfter("chr");
                }
            });

            //Fill sequence information, only miRNA and tRNA will be filled.
            if (!string.IsNullOrEmpty(this.FastaFile))
            {
                Console.WriteLine("Reading sequence from {0} ...", this.FastaFile);
                var seqs = SequenceUtils.Read(new FastaFormat(), this.FastaFile).ToDictionary(m => m.Name);
                items.ForEach(m =>
                {
                    if (m.Name.StartsWith(SmallRNAConsts.miRNA) || m.Name.StartsWith(SmallRNAConsts.tRNA))
                    {
                        if (seqs.ContainsKey(m.Name))
                        {
                            m.Sequence = seqs[m.Name].SeqString;
                        }
                        else
                        {
                            Console.WriteLine("Missing sequence: " + m.Name);
                        }
                    }
                    else
                    {
                        m.Sequence = string.Empty;
                    }
                });
                seqs.Clear();
            }

            var result = items.ConvertAll(m => new FeatureLocation(m)).ToList();

            result.ForEach(m =>
            {
                foreach (var categoryName in SmallRNAConsts.Biotypes)
                {
                    if (m.Name.StartsWith(categoryName))
                    {
                        m.Category = categoryName;
                    }
                }
            });
            return(result);
        }
Example #2
0
        public MappedMirnaRegion FindOrCreateRegion(string loc)
        {
            foreach (var region in MappedRegions)
            {
                if (region.Region.GetLocation().Equals(loc))
                {
                    return(region);
                }
            }

            var result = new MappedMirnaRegion();

            result.Region = SequenceRegionUtils.ParseLocation <SequenceRegion>(loc);
            this.MappedRegions.Add(result);

            return(result);
        }
        public virtual List <FeatureLocation> GetSequenceRegions()
        {
            //Read sequence regions
            var result = SequenceRegionUtils.GetSequenceRegions(CoordinateFile, GtfFeatureName, BedAsGtf);

            result.ForEach(m =>
            {
                m.Seqname = m.Seqname.StringAfter("chr");
            });

            //Fill sequence information
            var sr = result.FirstOrDefault(m => m.Name.Contains(":"));

            if (sr != null)
            {
                var sequence = sr.Name.StringAfter(":");
                if (sequence.All(m => MIRNA.Contains(m)))
                {
                    result.ForEach(m => m.Sequence = m.Name.StringAfter(":"));
                    result.ForEach(m => m.Name     = m.Name.StringBefore(":"));
                }
            }

            if (!string.IsNullOrEmpty(this.FastaFile))
            {
                Console.WriteLine("Reading sequence from {0} ...", this.FastaFile);
                var seqs = SequenceUtils.Read(new FastaFormat(), this.FastaFile).ToDictionary(m => m.Name);
                result.ForEach(m =>
                {
                    if (seqs.ContainsKey(m.Name))
                    {
                        m.Sequence = seqs[m.Name].SeqString;
                    }
                    else
                    {
                        Console.WriteLine("Missing sequence: " + m.Name);
                    }
                });
                seqs.Clear();
            }

            return(result.ConvertAll(m => new FeatureLocation(m)).ToList());
        }
        public override IEnumerable <string> Process()
        {
            List <GtfItem> items = new List <GtfItem>();

            foreach (var corfile in _options.CoordinateFiles)
            {
                var curitems = SequenceRegionUtils.GetSequenceRegions(corfile);
                if (_options.Features != null && _options.Features.Count > 0)
                {
                    items.RemoveAll(m => !_options.Features.Contains(m.Feature));
                }
                if (!string.IsNullOrEmpty(_options.NameKey))
                {
                    var key = _options.NameKey + "=";
                    items.ForEach(m =>
                    {
                        if (!string.IsNullOrEmpty(m.Attributes) && m.Attributes.Contains(key))
                        {
                            m.Name = m.Attributes.StringAfter(key).StringBefore(";").Trim();
                        }
                    });
                }
                items.AddRange(curitems);
            }

            var removechr = !items.All(m => m.Seqname.StartsWith("chr"));

            if (removechr)
            {
                items.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));
            }

            var map = items.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList());

            using (StreamWriter sw = new StreamWriter(_options.OutputFile))
            {
                using (var sr = new StreamReader(_options.InputFile))
                {
                    string line;
                    var    headers = sr.ReadLine().Split(',').ToList();
                    headers.Add("ClosetFeature");
                    headers.Add("ClosetFeatureLocus");
                    headers.Add("ClosetFeatureDistance");

                    sw.WriteLine(headers.Merge(','));

                    List <GtfItem> mingtfs = new List <GtfItem>();
                    while ((line = sr.ReadLine()) != null)
                    {
                        var parts = line.Split(',');
                        var chr   = parts[0];
                        if (removechr)
                        {
                            chr = chr.StringAfter("chr");
                        }
                        var start       = long.Parse(parts[2]);
                        var end         = long.Parse(parts[3]);
                        var sequence    = parts[5];
                        var location    = int.Parse(parts[7]);
                        var t2c_feature = string.Empty;

                        List <GtfItem> gtfs;
                        if (!map.TryGetValue(chr, out gtfs))
                        {
                            sw.WriteLine("{0},,,", line);
                            continue;
                        }

                        long mindist = int.MaxValue;
                        mingtfs.Clear();

                        foreach (var gtf in gtfs)
                        {
                            long dist;
                            if (gtf.Start > end)
                            {
                                dist = gtf.Start - end;
                            }
                            else if (gtf.End < start)
                            {
                                dist = start - gtf.End;
                            }
                            else
                            {
                                dist = 0;
                            }

                            if (dist < mindist)
                            {
                                mingtfs.Clear();
                                mingtfs.Add(gtf);
                                mindist = dist;
                            }
                            else if (dist == mindist)
                            {
                                mingtfs.Add(gtf);
                            }
                        }


                        sw.WriteLine("{0},{1},{2},{3}",
                                     line,
                                     (from m in mingtfs select m.Name).Merge(";"),
                                     (from m in mingtfs select m.GetLocation()).Merge(";"),
                                     mindist);
                    }
                }
            }

            return(new string[] { _options.OutputFile });
        }
Example #5
0
        public override IEnumerable <string> Process()
        {
            PileupCountList pc = new PileupCountList();

            var format = options.GetSAMFormat();

            var cm = new SmallRNACountMap(options.CountFile);

            var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf);

            srItems.ForEach(m =>
            {
                m.Seqname = m.Seqname.StringAfter("chr");
            });
            var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList());

            StreamWriter swScript = null;

            try
            {
                if (options.ExportIgvScript)
                {
                    swScript = new StreamWriter(options.OutputFile + ".igv");
                    swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/'));
                }

                using (StreamWriter sw = new StreamWriter(options.OutputFile))
                {
                    sw.WriteLine(@"##fileformat=VCFv4.2
##fileDate={0:yyyyMMdd}
##source={1}
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data"">
##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth"">
##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency"">
##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value"">
##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position"">
##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}"">
##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}"">
##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus"">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth"">
##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth"">
#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  {4}",
                                 DateTime.Now,
                                 "PileupCountBuilder",
                                 options.FisherPValue,
                                 options.MinimumAlternativeAlleleFrequency,
                                 Path.GetFileNameWithoutExtension(options.InputFile));

                    using (var sr = SAMFactory.GetReader(options.InputFile, true))
                    {
                        int    count = 0;
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            count++;

                            if (count % 100 == 0)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads processed", count);
                            }

                            var parts = line.Split('\t');

                            var qname = parts[SAMFormatConst.QNAME_INDEX];
                            var seq   = parts[SAMFormatConst.SEQ_INDEX];

                            //too short
                            if (seq.Length < options.MinimumReadLength)
                            {
                                continue;
                            }

                            SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                            //unmatched
                            if (flag.HasFlag(SAMFlags.UnmappedQuery))
                            {
                                continue;
                            }

                            var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                            //insertion/deletion
                            if (cigar.Any(m => m == 'I' || m == 'D'))
                            {
                                continue;
                            }

                            var sam = new SAMAlignedItem()
                            {
                                Qname = qname,
                            };

                            bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                            char strand;
                            if (isReversed)
                            {
                                strand       = '-';
                                sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
                            }
                            else
                            {
                                strand       = '+';
                                sam.Sequence = seq;
                            }

                            var loc = new SAMAlignedLocation(sam)
                            {
                                Seqname           = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"),
                                Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                                Strand            = strand,
                                Cigar             = parts[SAMFormatConst.CIGAR_INDEX],
                                MismatchPositions = format.GetMismatchPositions(parts),
                                NumberOfMismatch  = format.GetNumberOfMismatch(parts),
                                Sequence          = seq
                            };

                            loc.ParseEnd(sam.Sequence);
                            sam.AddLocation(loc);

                            if (format.HasAlternativeHits)
                            {
                                format.ParseAlternativeHits(parts, sam);
                            }

                            var finished = pc.Add(sam, cm.GetCount(sam.Qname));
                            if (null == finished || 0 == finished.Count)
                            {
                                continue;
                            }

                            foreach (var fin in finished)
                            {
                                //if (fin.Chromosome.Equals("1") && fin.Position == 5160725)
                                //{
                                //  Console.WriteLine(fin);
                                //}
                                var ft = fin.FisherExactTest();
                                if (ft.PValue <= options.FisherPValue)
                                {
                                    var total     = fin.Sum(m => m.Value);
                                    var minallele = total * options.MinimumAlternativeAlleleFrequency;
                                    if (ft.Sample2.Failed >= minallele)
                                    {
                                        List <GtfItem> srs;
                                        List <string>  ranges = new List <string>();

                                        if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs))
                                        {
                                            foreach (var seqr in srs)
                                            {
                                                if (seqr.Contains(fin.Position))
                                                {
                                                    ranges.Add(seqr.GetNameLocation());
                                                }
                                            }
                                        }

                                        var alter = (from r in fin
                                                     where r.Key != fin.Reference
                                                     orderby r.Key
                                                     select r).ToList();

                                        var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}",
                                                                fin.Chromosome,
                                                                fin.Position,
                                                                ".",
                                                                fin.Reference,
                                                                (from r in alter
                                                                 select r.Key.ToString()).Merge(","),
                                                                0,
                                                                ranges.Count == 0 ? "notMiRNA" : "PASS",
                                                                1,
                                                                total,
                                                                (from r in alter
                                                                 select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","),
                                                                ft.PValue,
                                                                ranges.Count == 0 ? "" : ";" + ranges.Merge(","),
                                                                total,
                                                                ft.Sample2.Succeed,
                                                                (from r in alter
                                                                 select r.Value.ToString()).Merge(","));

                                        sw.WriteLine(str);
                                        //Console.WriteLine(str);

                                        if (swScript != null && ranges.Count > 0)
                                        {
                                            swScript.WriteLine(@"goto {0}:{1}
sort position
snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_'));
                                        }
                                    }
                                }
                            }

                            finished.Clear();
                        }
                    }
                }
            }
            finally
            {
                if (swScript != null)
                {
                    swScript.Close();
                }
            }
            return(new string[] { options.OutputFile });
        }
Example #6
0
        public override IEnumerable <string> Process()
        {
            var srItems = SequenceRegionUtils.GetSequenceRegions(options.InputFile).Where(m => options.AcceptName(m.Name)).ToList();

            srItems = (from sr in srItems.GroupBy(m => m.Name)
                       select sr.First()).ToList();

            var keepChrInName = options.KeepChrInName && srItems.Any(m => m.Name.StartsWith("chr"));

            if (!keepChrInName)
            {
                srItems.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));
            }

            var srMap = srItems.ToGroupDictionary(m => m.Seqname);

            var ff = new FastaFormat(int.MaxValue);

            using (StreamWriter sw = new StreamWriter(options.OutputFile))
            {
                using (StreamReader sr = new StreamReader(options.GenomeFastaFile))
                {
                    Sequence seq;
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        Progress.SetMessage("processing " + seq.Name + " ...");
                        var name = seq.Name;
                        if (!keepChrInName)
                        {
                            name = name.StringAfter("chr");
                        }

                        List <GtfItem> items;

                        if (!srMap.TryGetValue(name, out items))
                        {
                            if (name.Equals("M"))
                            {
                                name = "MT";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("chrM"))
                            {
                                name = "chrMT";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("MT"))
                            {
                                name = "M";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("chrMT"))
                            {
                                name = "chrM";
                                srMap.TryGetValue(name, out items);
                            }
                        }

                        if (items != null)
                        {
                            Progress.SetMessage("  there are {0} entries in {1} ...", items.Count, name);
                            foreach (var item in items)
                            {
                                if (item.Start - 1 + item.Length >= seq.SeqString.Length)
                                {
                                    throw new Exception(string.Format("{0} exceed chromosome {1} length {2}", item, name, seq.SeqString.Length));
                                }
                                var newseq = seq.SeqString.Substring((int)item.Start - 1, (int)item.Length);
                                if (item.Strand == '-')
                                {
                                    newseq = SequenceUtils.GetReverseComplementedSequence(newseq);
                                }
                                newseq = newseq.ToUpper();

                                var newname = string.Format("{0} {1} {2}", item.Name, item.GetLocationWithoutStrand(), item.Strand);
                                var entry   = new Sequence(newname, newseq);

                                ff.WriteSequence(sw, entry);
                            }
                        }
                    }
                }
            }
            return(new string[] { options.OutputFile });
        }