public static bool IsPaired(string bamFile)
        {
            using (var sr = new FastqItemBAMParser(bamFile))
            {
                FastqItem item;
                var       count = 0;
                while ((item = sr.ParseNext()) != null)
                {
                    if (item.Flags.HasFlag(SAMFlags.PairedRead))
                    {
                        return(true);
                    }

                    if (item.Qname.EndsWith("/1") || item.Qname.EndsWith("/2"))
                    {
                        return(true);
                    }

                    count++;
                    if (count > 100000)
                    {
                        break;
                    }
                }
            }
            return(false);
        }
Example #2
0
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("This single end bam file has been sorted by name, it will cost less time/memory to generate fastq files ...");

            var output = _options.OutputPrefix + ".fastq";

            if (!_options.UnGzipped)
            {
                output = output + ".gz";
            }

            var tmp = output + ".tmp";

            using (var sw = StreamUtils.GetWriter(tmp, !_options.UnGzipped))
            {
                using (var sr = new FastqItemBAMParser(_options.InputFile))
                {
                    string    lastname = null;
                    FastqItem ss;
                    var       count       = 0;
                    var       outputCount = 0;
                    while ((ss = sr.ParseNext()) != null)
                    {
                        count++;

                        if (count % 100000 == 0)
                        {
                            Progress.SetMessage("{0} reads", count);
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }
                        }

                        if (!ss.Qname.Equals(lastname))
                        {
                            ss.WriteFastq(sw);
                            lastname = ss.Qname;
                            outputCount++;
                            if (outputCount % 1000000 == 0)
                            {
                                GC.Collect();
                                GC.WaitForPendingFinalizers();
                                Progress.SetMessage(string.Format("{0} single reads processed, cost memory: {1} MB", outputCount, (GC.GetTotalMemory(true) / 1048576)));
                            }
                        }
                    }
                }
            }

            if (File.Exists(output))
            {
                File.Delete(output);
            }
            File.Move(tmp, output);

            return(new[] { output });
        }
 public static bool IsSortedByName(string bamFile)
 {
     using (var sr = new FastqItemBAMParser(bamFile))
     {
         var hd = sr.Header.RecordFields.FirstOrDefault(m => m.Typecode.Equals("HD"));
         if (hd != null)
         {
             var so = hd.Tags.FirstOrDefault(m => m.Tag.Equals("SO"));
             if (so != null)
             {
                 return(so.Value.Equals("queryname"));
             }
         }
     }
     return(false);
 }
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("This single end bam file is not sorted by name, it will cost more time/memory to generate fastq files ...");

            var output = _options.OutputPrefix + ".fastq";

            if (!_options.UnGzipped)
            {
                output = output + ".gz";
            }

            var tmp = output + ".tmp";

            using (var sw = StreamUtils.GetWriter(tmp, !_options.UnGzipped))
            {
                using (var sr = new FastqItemBAMParser(_options.InputFile))
                {
                    FastqItem ss;
                    var       count = 0;
                    while ((ss = sr.ParseNext()) != null)
                    {
                        count++;

                        if (count % 100000 == 0)
                        {
                            Progress.SetMessage("{0} reads", count);
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }
                        }

                        ss.WriteFastq(sw);
                        sr.IgnoreQuery.Add(ss.Qname);
                    }
                }
            }

            if (File.Exists(output))
            {
                File.Delete(output);
            }
            File.Move(tmp, output);

            return(new[] { output });
        }
        public override IProcessor GetProcessor(Bam2FastqProcessorOptions options)
        {
            if (FastqItemBAMParser.IsPaired(options.InputFile))
            {
                if (FastqItemBAMParser.IsSortedByName(options.InputFile))
                {
                    return(new Bam2PairedFastqNameSortedProcessor(options));
                }
                return(new Bam2PairedFastqProcessor(options));
            }

            if (FastqItemBAMParser.IsSortedByName(options.InputFile))
            {
                return(new Bam2SingleFastqNameSortedProcessor(options));
            }
            return(new Bam2SingleFastqProcessor(options));
        }
Example #6
0
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("This pair end bam file has been sorted by name, it will cost less time to generate fastq files ...");

            var output1 = _options.OutputPrefix + ".1.fastq";
            var output2 = _options.OutputPrefix + ".2.fastq";

            if (!_options.UnGzipped)
            {
                output1 = output1 + ".gz";
                output2 = output2 + ".gz";
            }
            var tmp1    = output1 + ".tmp";
            var tmp2    = output2 + ".tmp";
            var output3 = Path.ChangeExtension(_options.OutputPrefix, ".orphan.fastq");

            var    paired   = new FastqItem[3];
            string lastname = null;

            using (var sw1 = StreamUtils.GetWriter(tmp1, !_options.UnGzipped))
            {
                using (var sw2 = StreamUtils.GetWriter(tmp2, !_options.UnGzipped))
                {
                    using (var sw3 = new StreamWriter(output3))
                    {
                        var sw = new[] { null, sw1, sw2, sw3 };

                        using (var sr = new FastqItemBAMParser(_options.InputFile))
                        {
                            FastqItem ss;
                            var       count       = 0;
                            var       outputCount = 0;
                            while ((ss = sr.ParseNext()) != null)
                            {
                                if (string.IsNullOrEmpty(ss.Qname))
                                {
                                    throw new Exception(string.Format("Entry after {0} has empty name", lastname));
                                }

                                ss.CheckPairedName();

                                if (string.IsNullOrEmpty(ss.Qname))
                                {
                                    throw new Exception(string.Format("After check paired name, entry after {0} has empty name", lastname));
                                }

                                count++;

                                if (count % 100000 == 0)
                                {
                                    Progress.SetMessage("{0} reads processed.", count);
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }
                                }

                                if (lastname == null)
                                {
                                    paired[ss.PairIndex] = ss;
                                    lastname             = ss.PairName;
                                    continue;
                                }

                                if (ss.PairName.Equals(lastname))
                                {
                                    paired[ss.PairIndex] = ss;
                                    continue;
                                }

                                outputCount = WriteFastq(paired, sw, outputCount);

                                paired[1]            = null;
                                paired[2]            = null;
                                paired[ss.PairIndex] = ss;
                                lastname             = ss.PairName;
                            }

                            WriteFastq(paired, sw, outputCount);
                        }
                    }
                }
            }

            if (File.Exists(output1))
            {
                File.Delete(output1);
            }
            File.Move(tmp1, output1);

            if (File.Exists(output2))
            {
                File.Delete(output2);
            }
            File.Move(tmp2, output2);

            if (new FileInfo(output3).Length == 0)
            {
                File.Delete(output3);
            }

            return(new[] { output1, output2 });
        }
Example #7
0
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("This pair end bam file is not sorted by name, it will cost more time/memory to generate fastq files ...");
            var map     = new Dictionary <string, FastqItem>();
            var output1 = _options.OutputPrefix + ".1.fastq";
            var output2 = _options.OutputPrefix + ".2.fastq";

            if (!_options.UnGzipped)
            {
                output1 = output1 + ".gz";
                output2 = output2 + ".gz";
            }
            var tmp1 = output1 + ".tmp";
            var tmp2 = output2 + ".tmp";

            using (var sw1 = StreamUtils.GetWriter(tmp1, !_options.UnGzipped))
            {
                using (var sw2 = StreamUtils.GetWriter(tmp2, !_options.UnGzipped))
                {
                    var sw = new[] { null, sw1, sw2 };

                    using (var sr = new FastqItemBAMParser(_options.InputFile))
                    {
                        FastqItem ss;
                        var       count       = 0;
                        var       outputCount = 0;
                        while ((ss = sr.ParseNext()) != null)
                        {
                            ss.CheckPairedName();

                            count++;

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads processed, {1} unpaired.", count, map.Count);
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            FastqItem paired;
                            if (map.TryGetValue(ss.PairName, out paired))
                            {
                                if (paired.PairIndex == ss.PairIndex)
                                {
                                    continue;
                                }
                                ss.WriteFastq(sw[ss.PairIndex]);
                                paired.WriteFastq(sw[paired.PairIndex]);
                                sr.IgnoreQuery.Add(ss.Qname);
                                sr.IgnoreQuery.Add(paired.Qname);
                                map.Remove(ss.PairName);
                                outputCount++;
                                if (outputCount % 100000 == 0)
                                {
                                    var temp = new Dictionary <string, FastqItem>(map);
                                    map.Clear();
                                    map = temp;
                                    GC.Collect();
                                    GC.WaitForPendingFinalizers();
                                    Progress.SetMessage("Cost memory: " + (GC.GetTotalMemory(true) / 1048576) + " MB");
                                }
                            }
                            else
                            {
                                map[ss.PairName] = ss;
                            }
                        }
                    }
                }
            }

            if (File.Exists(output1))
            {
                File.Delete(output1);
            }
            File.Move(tmp1, output1);

            if (File.Exists(output2))
            {
                File.Delete(output2);
            }
            File.Move(tmp2, output2);

            if (map.Count > 0)
            {
                var output3 = Path.ChangeExtension(_options.OutputPrefix, ".orphan.fastq");
                using (var sw3 = new StreamWriter(output3))
                {
                    foreach (var v in map.Values)
                    {
                        v.WriteFastq(sw3);
                    }
                }
            }

            return(new[] { output1, output2 });
        }