public override bool PrepareOptions() { if (!PrepareOutputDirectory()) { return(false); } try { using (SAMFactory.GetReader(this.NormalFile)) { } using (SAMFactory.GetReader(this.TumorFile)) { } } catch (Exception ex) { ParsingErrors.Add(ex.Message); return(false); } if (this.ThreadCount >= 2) { Console.WriteLine("Checking chromosome names for thread mode ..."); if (this.ChromosomeNames == null || this.ChromosomeNames.Count == 0) { this.ChromosomeNames = SAMUtils.GetChromosomes(this.NormalFile); } foreach (var chr in this.ChromosomeNames) { Console.WriteLine(chr); } } else { if (this.ChromosomeNames != null && this.ChromosomeNames.Count > 0) { Console.Out.WriteLine("#mpileup chromosome names: " + this.ChromosomeNames.Merge(",")); } } return(true); }
protected virtual List <T> DoAddCompleted <T>(List <T> samlist) where T : SAMAlignedItem, new() { if (_options.EngineType == 4 || _options.GetSAMFormat().HasAlternativeHits || samlist.Count == 0) { return(samlist); } Progress.SetMessage("Sorting mapped reads by name..."); SAMUtils.SortByName(samlist); Progress.SetMessage("Merge reads from same query..."); var result = new List <T>(); result.Add(samlist[0]); T last = samlist[0]; for (int i = 1; i < samlist.Count; i++) { var sam = samlist[i]; if (!last.Qname.Equals(sam.Qname)) { last = sam; result.Add(last); } else { last.AddLocations(sam.Locations); sam.ClearLocations(); } } samlist.Clear(); samlist = null; KeepUniqueLocation <T>(result); Progress.SetMessage("Total {0} read(s) mapped.", result.Count); return(result); }
public int Extract(string sourceFile, string targetFile, IEnumerable <string> exceptQueryNames, string countFile) { int result = 0; var except = new HashSet <string>(exceptQueryNames); SmallRNACountMap cm = new SmallRNACountMap(); StreamWriter swCount = null; if (File.Exists(countFile)) { var oldCm = new SmallRNACountMap(countFile); foreach (var c in oldCm.Counts) { cm.Counts[c.Key.StringBefore(SmallRNAConsts.NTA_TAG)] = c.Value; } swCount = new StreamWriter(targetFile + ".dupcount"); } try { using (var sw = StreamUtils.GetWriter(targetFile, targetFile.ToLower().EndsWith(".gz"))) { using (var sr = SAMFactory.GetReader(sourceFile, true)) { string line; var count = 0; while ((line = sr.ReadLine()) != null) { count++; if (count % 100000 == 0) { Progress.SetMessage("{0} reads", count); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var ss = SAMUtils.Parse <SAMItemSlim>(line); ss.Qname = ss.Qname.StringBefore(SmallRNAConsts.NTA_TAG); if (except.Contains(ss.Qname)) { continue; } if (Filter != null && !Filter.Accept(ss)) { continue; } except.Add(ss.Qname); ss.WriteFastq(sw); if (swCount != null) { swCount.WriteLine("{0}\t{1}", ss.Qname, cm.Counts[ss.Qname]); } result++; } } } } finally { if (swCount != null) { swCount.Close(); } } return(result); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); _format = _options.GetSAMFormat(); totalQueries = new List <QueryInfo>(); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var qi = new QueryInfo(qname); totalQueries.Add(qi); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); if (!_filter.AcceptFlags(flag)) { continue; } var mismatchCount = _format.GetNumberOfMismatch(parts); var seq = parts[SAMFormatConst.SEQ_INDEX]; qi.Mismatch = mismatchCount; qi.Length = seq.Length; //too many mismatchs if (!_filter.AcceptMismatch(mismatchCount)) { continue; } if (!_filter.AcceptQueryName(qname)) { continue; } if (!_filter.AcceptLength(seq.Length)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; if (!_filter.AcceptCigar(cigar)) { continue; } var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"); var start = int.Parse(parts[SAMFormatConst.POS_INDEX]); var end = SAMUtils.ParseEnd(start, cigar); bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; } else { strand = '+'; } var sam = new T(); var loc = new SAMAlignedLocation(sam) { Seqname = seqname, Start = start, End = end, Strand = strand, }; if (!_filter.AcceptLocus(loc)) { continue; } if (isReversed) { seq = SequenceUtils.GetReverseComplementedSequence(seq); } sam.Qname = qname; sam.Sequence = seq; loc.AlignmentScore = _format.GetAlignmentScore(parts); loc.Cigar = cigar; loc.NumberOfMismatch = mismatchCount; loc.MismatchPositions = _format.GetMismatchPositions(parts); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } result.Add(sam); waitingcount++; if (waitingcount % 100 == 0) { Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count); } } } return(result); }
private static SAMItemSlim LineToSamItem(string line) { return(SAMUtils.Parse <SAMItemSlim>(line)); }