public override bool PrepareOptions()
        {
            if (!PrepareOutputDirectory())
            {
                return(false);
            }

            try
            {
                using (SAMFactory.GetReader(this.NormalFile)) { }
                using (SAMFactory.GetReader(this.TumorFile)) { }
            }
            catch (Exception ex)
            {
                ParsingErrors.Add(ex.Message);
                return(false);
            }

            if (this.ThreadCount >= 2)
            {
                Console.WriteLine("Checking chromosome names for thread mode ...");
                if (this.ChromosomeNames == null || this.ChromosomeNames.Count == 0)
                {
                    this.ChromosomeNames = SAMUtils.GetChromosomes(this.NormalFile);
                }

                foreach (var chr in this.ChromosomeNames)
                {
                    Console.WriteLine(chr);
                }
            }
            else
            {
                if (this.ChromosomeNames != null && this.ChromosomeNames.Count > 0)
                {
                    Console.Out.WriteLine("#mpileup chromosome names: " + this.ChromosomeNames.Merge(","));
                }
            }

            return(true);
        }
コード例 #2
0
        protected virtual List <T> DoAddCompleted <T>(List <T> samlist) where T : SAMAlignedItem, new()
        {
            if (_options.EngineType == 4 || _options.GetSAMFormat().HasAlternativeHits || samlist.Count == 0)
            {
                return(samlist);
            }

            Progress.SetMessage("Sorting mapped reads by name...");
            SAMUtils.SortByName(samlist);
            Progress.SetMessage("Merge reads from same query...");

            var result = new List <T>();

            result.Add(samlist[0]);
            T last = samlist[0];

            for (int i = 1; i < samlist.Count; i++)
            {
                var sam = samlist[i];
                if (!last.Qname.Equals(sam.Qname))
                {
                    last = sam;
                    result.Add(last);
                }
                else
                {
                    last.AddLocations(sam.Locations);
                    sam.ClearLocations();
                }
            }

            samlist.Clear();
            samlist = null;

            KeepUniqueLocation <T>(result);

            Progress.SetMessage("Total {0} read(s) mapped.", result.Count);

            return(result);
        }
コード例 #3
0
        public int Extract(string sourceFile, string targetFile, IEnumerable <string> exceptQueryNames, string countFile)
        {
            int result = 0;

            var except = new HashSet <string>(exceptQueryNames);

            SmallRNACountMap cm      = new SmallRNACountMap();
            StreamWriter     swCount = null;

            if (File.Exists(countFile))
            {
                var oldCm = new SmallRNACountMap(countFile);
                foreach (var c in oldCm.Counts)
                {
                    cm.Counts[c.Key.StringBefore(SmallRNAConsts.NTA_TAG)] = c.Value;
                }
                swCount = new StreamWriter(targetFile + ".dupcount");
            }

            try
            {
                using (var sw = StreamUtils.GetWriter(targetFile, targetFile.ToLower().EndsWith(".gz")))
                {
                    using (var sr = SAMFactory.GetReader(sourceFile, true))
                    {
                        string line;
                        var    count = 0;
                        while ((line = sr.ReadLine()) != null)
                        {
                            count++;

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads", count);
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            var ss = SAMUtils.Parse <SAMItemSlim>(line);
                            ss.Qname = ss.Qname.StringBefore(SmallRNAConsts.NTA_TAG);
                            if (except.Contains(ss.Qname))
                            {
                                continue;
                            }

                            if (Filter != null && !Filter.Accept(ss))
                            {
                                continue;
                            }

                            except.Add(ss.Qname);
                            ss.WriteFastq(sw);

                            if (swCount != null)
                            {
                                swCount.WriteLine("{0}\t{1}", ss.Qname, cm.Counts[ss.Qname]);
                            }

                            result++;
                        }
                    }
                }
            }
            finally
            {
                if (swCount != null)
                {
                    swCount.Close();
                }
            }
            return(result);
        }
コード例 #4
0
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            _format = _options.GetSAMFormat();

            totalQueries = new List <QueryInfo>();

            using (var sr = SAMFactory.GetReader(fileName, true))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    count++;

                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    var parts = line.Split('\t');

                    var qname = parts[SAMFormatConst.QNAME_INDEX];
                    var qi    = new QueryInfo(qname);
                    totalQueries.Add(qi);

                    SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                    if (!_filter.AcceptFlags(flag))
                    {
                        continue;
                    }

                    var mismatchCount = _format.GetNumberOfMismatch(parts);
                    var seq           = parts[SAMFormatConst.SEQ_INDEX];

                    qi.Mismatch = mismatchCount;
                    qi.Length   = seq.Length;

                    //too many mismatchs
                    if (!_filter.AcceptMismatch(mismatchCount))
                    {
                        continue;
                    }

                    if (!_filter.AcceptQueryName(qname))
                    {
                        continue;
                    }

                    if (!_filter.AcceptLength(seq.Length))
                    {
                        continue;
                    }

                    var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                    if (!_filter.AcceptCigar(cigar))
                    {
                        continue;
                    }

                    var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr");
                    var start   = int.Parse(parts[SAMFormatConst.POS_INDEX]);
                    var end     = SAMUtils.ParseEnd(start, cigar);

                    bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    char strand;
                    if (isReversed)
                    {
                        strand = '-';
                    }
                    else
                    {
                        strand = '+';
                    }

                    var sam = new T();
                    var loc = new SAMAlignedLocation(sam)
                    {
                        Seqname = seqname,
                        Start   = start,
                        End     = end,
                        Strand  = strand,
                    };

                    if (!_filter.AcceptLocus(loc))
                    {
                        continue;
                    }

                    if (isReversed)
                    {
                        seq = SequenceUtils.GetReverseComplementedSequence(seq);
                    }

                    sam.Qname    = qname;
                    sam.Sequence = seq;

                    loc.AlignmentScore    = _format.GetAlignmentScore(parts);
                    loc.Cigar             = cigar;
                    loc.NumberOfMismatch  = mismatchCount;
                    loc.MismatchPositions = _format.GetMismatchPositions(parts);

                    if (_format.HasAlternativeHits)
                    {
                        _format.ParseAlternativeHits(parts, sam);
                    }

                    result.Add(sam);

                    waitingcount++;

                    if (waitingcount % 100 == 0)
                    {
                        Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count);
                    }
                }
            }

            return(result);
        }
コード例 #5
0
 private static SAMItemSlim LineToSamItem(string line)
 {
     return(SAMUtils.Parse <SAMItemSlim>(line));
 }