private HashSet <string> ReadPerfectMappedReadNames(string readNameFile) { HashSet <string> reads = new HashSet <string>(); var ext = Path.GetExtension(readNameFile).ToLower(); if (!ext.Equals(".bam") && !ext.Equals(".sam")) { Progress.SetMessage("Reading perfect mapped reads from text file {0} ...", readNameFile); reads = new HashSet <string>(File.ReadAllLines(readNameFile)); } else { Progress.SetMessage("Reading perfect mapped reads from bam/sam file {0} ...", readNameFile); var list = new List <string>(); using (var sr = SAMFactory.GetReader(readNameFile, true)) { string line; while ((line = sr.ReadLine()) != null) { if (line.Contains("NM:i:0")) { list.Add(line.StringBefore("\t")); } } } reads = new HashSet <string>(list); } Progress.SetMessage("{0} perfect mapped reads.", reads.Count); return(reads); }
public AlignedPositionMapBuilder(AlignedPositionMapBuilderOptions options, string fileName) { this._options = options; _format = options.GetSAMFormat(); _file = SAMFactory.GetReader(fileName, true); _list = new AlignedPositionMapList(); _done = new List <AlignedPositionMap>(); }
public override IEnumerable <string> Process() { var format = new MappedItemGroupXmlFileFormat(); Progress.SetMessage("reading mapped reads from " + _options.CountFile + " ..."); var mapped = format.ReadFromFile(_options.CountFile); var sequenceLocusSet = new HashSet <string>(from item in mapped from mi in item from mr in mi.MappedRegions from al in mr.AlignedLocations select string.Format("{0}:{1}:{2}", al.Parent.Sequence, al.Seqname, al.Start)); Progress.SetMessage("There are {0} unique sequence:locus", sequenceLocusSet.Count); using (var sw = new StreamWriter(_options.OutputFile)) { using (var sr = SAMFactory.GetReader(_options.BamFile, false)) { sr.ReadHeaders().ForEach(m => sw.WriteLine(m)); int count = 0; int accepted = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", accepted, count); } count++; var parts = line.Split('\t'); var locus = string.Format("{0}:{1}:{2}", parts[SAMFormatConst.SEQ_INDEX], parts[SAMFormatConst.RNAME_INDEX], parts[SAMFormatConst.POS_INDEX]); if (!sequenceLocusSet.Contains(locus)) { continue; } sw.WriteLine(line); accepted++; } } } return(new[] { _options.OutputFile }); }
private void GetChromosomes(string normalFile) { using (var file = SAMFactory.GetReader(normalFile, false)) { var headers = file.ReadHeaders(); this.Chromosomes = (from h in headers where h.StartsWith("@SQ") select h.StringAfter("SN:").StringBefore("\t")).ToList(); } }
public virtual ISAMFormat GetSAMFormat() { var result = SAMFactory.GetFormat(this.EngineType); if (result == null) { throw new Exception(string.Format("No SAM format defined for engine {0}", this.EngineType)); } return(result); }
private IEnumerable <string> DoSingleEndProcess() { using (var sw = new StreamWriter(options.OutputFile)) { using (var sr = SAMFactory.GetReader(options.InputFile, options.Samtools, true)) { string line; var count = 0; var ignored = new HashSet <string>(); while ((line = sr.ReadLine()) != null) { count++; if (count % 100000 == 0) { Progress.SetMessage("{0} reads", count); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } try { var ss = LineToSamItem(line); Console.WriteLine(ss.Qname); if (ignored.Contains(ss.Qname)) { continue; } ss.WriteFastq(sw); ignored.Add(ss.Qname); } catch (Exception ex) { Console.Error.WriteLine("Error of line {0} : {1}", line, ex.StackTrace); throw; } } } } return(new[] { options.OutputFile }); }
public override bool PrepareOptions() { if (!PrepareOutputDirectory()) { return(false); } try { using (SAMFactory.GetReader(this.NormalFile)) { } using (SAMFactory.GetReader(this.TumorFile)) { } } catch (Exception ex) { ParsingErrors.Add(ex.Message); return(false); } if (this.ThreadCount >= 2) { Console.WriteLine("Checking chromosome names for thread mode ..."); if (this.ChromosomeNames == null || this.ChromosomeNames.Count == 0) { this.ChromosomeNames = SAMUtils.GetChromosomes(this.NormalFile); } foreach (var chr in this.ChromosomeNames) { Console.WriteLine(chr); } } else { if (this.ChromosomeNames != null && this.ChromosomeNames.Count > 0) { Console.Out.WriteLine("#mpileup chromosome names: " + this.ChromosomeNames.Merge(",")); } } return(true); }
public override IEnumerable <string> Process() { PileupCountList pc = new PileupCountList(); var format = options.GetSAMFormat(); var cm = new SmallRNACountMap(options.CountFile); var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf); srItems.ForEach(m => { m.Seqname = m.Seqname.StringAfter("chr"); }); var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList()); StreamWriter swScript = null; try { if (options.ExportIgvScript) { swScript = new StreamWriter(options.OutputFile + ".igv"); swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/')); } using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine(@"##fileformat=VCFv4.2 ##fileDate={0:yyyyMMdd} ##source={1} ##phasing=partial ##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data""> ##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth""> ##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency""> ##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value""> ##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position""> ##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}""> ##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}""> ##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus""> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth""> ##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth""> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT {4}", DateTime.Now, "PileupCountBuilder", options.FisherPValue, options.MinimumAlternativeAlleleFrequency, Path.GetFileNameWithoutExtension(options.InputFile)); using (var sr = SAMFactory.GetReader(options.InputFile, true)) { int count = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 100 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0) { Progress.SetMessage("{0} reads processed", count); } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var seq = parts[SAMFormatConst.SEQ_INDEX]; //too short if (seq.Length < options.MinimumReadLength) { continue; } SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; //insertion/deletion if (cigar.Any(m => m == 'I' || m == 'D')) { continue; } var sam = new SAMAlignedItem() { Qname = qname, }; bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; sam.Sequence = seq; } var loc = new SAMAlignedLocation(sam) { Seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"), Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = parts[SAMFormatConst.CIGAR_INDEX], MismatchPositions = format.GetMismatchPositions(parts), NumberOfMismatch = format.GetNumberOfMismatch(parts), Sequence = seq }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (format.HasAlternativeHits) { format.ParseAlternativeHits(parts, sam); } var finished = pc.Add(sam, cm.GetCount(sam.Qname)); if (null == finished || 0 == finished.Count) { continue; } foreach (var fin in finished) { //if (fin.Chromosome.Equals("1") && fin.Position == 5160725) //{ // Console.WriteLine(fin); //} var ft = fin.FisherExactTest(); if (ft.PValue <= options.FisherPValue) { var total = fin.Sum(m => m.Value); var minallele = total * options.MinimumAlternativeAlleleFrequency; if (ft.Sample2.Failed >= minallele) { List <GtfItem> srs; List <string> ranges = new List <string>(); if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs)) { foreach (var seqr in srs) { if (seqr.Contains(fin.Position)) { ranges.Add(seqr.GetNameLocation()); } } } var alter = (from r in fin where r.Key != fin.Reference orderby r.Key select r).ToList(); var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}", fin.Chromosome, fin.Position, ".", fin.Reference, (from r in alter select r.Key.ToString()).Merge(","), 0, ranges.Count == 0 ? "notMiRNA" : "PASS", 1, total, (from r in alter select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","), ft.PValue, ranges.Count == 0 ? "" : ";" + ranges.Merge(","), total, ft.Sample2.Succeed, (from r in alter select r.Value.ToString()).Merge(",")); sw.WriteLine(str); //Console.WriteLine(str); if (swScript != null && ranges.Count > 0) { swScript.WriteLine(@"goto {0}:{1} sort position snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_')); } } } } finished.Clear(); } } } } finally { if (swScript != null) { swScript.Close(); } } return(new string[] { options.OutputFile }); }
public List <ChromosomeCountSlimItem> Build(string fileName) { if (File.Exists(options.CategoryMapFile)) { Progress.SetMessage("Reading name map file " + options.CategoryMapFile + " ..."); nameMap = new MapItemReader(0, 1).ReadFromFile(options.CategoryMapFile).ToDictionary(m => m.Key, m => m.Value.Value); } var result = new List <ChromosomeCountSlimItem>(); var queries = new Dictionary <string, SAMChromosomeItem>(); var chromosomes = new Dictionary <string, ChromosomeCountSlimItem>(); Regex chromosomeRegex = null; Func <string, bool> acceptChromosome; if (string.IsNullOrEmpty(options.ChromosomePattern)) { acceptChromosome = m => true; } else { chromosomeRegex = new Regex(options.ChromosomePattern); acceptChromosome = m => chromosomeRegex.Match(m).Success; } Progress.SetMessage("Parsing alignment file " + fileName + " ..."); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count); } count++; var parts = line.Split('\t'); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } var seqname = GetName(parts[SAMFormatConst.RNAME_INDEX]); if (!acceptChromosome(seqname)) { continue; } var qname = parts[SAMFormatConst.QNAME_INDEX]; SAMChromosomeItem query; if (!queries.TryGetValue(qname, out query)) { query = new SAMChromosomeItem(); query.Qname = qname; queries[qname] = query; if (options.KeepSequence) { query.Sequence = parts[SAMFormatConst.SEQ_INDEX]; if (flag.HasFlag(SAMFlags.QueryOnReverseStrand)) { query.Sequence = SequenceUtils.GetReverseComplementedSequence(query.Sequence); } } } query.Chromosomes.Add(seqname); ChromosomeCountSlimItem item; if (!chromosomes.TryGetValue(seqname, out item)) { item = new ChromosomeCountSlimItem(); item.Names.Add(seqname); chromosomes[seqname] = item; result.Add(item); } item.Queries.Add(query); waitingcount++; } Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count); } foreach (var query in queries.Values) { query.Chromosomes = query.Chromosomes.Distinct().OrderBy(m => m).ToList(); } foreach (var sam in chromosomes.Values) { sam.Queries = sam.Queries.Distinct().OrderBy(m => m.Qname).ToList(); } if (!string.IsNullOrEmpty(options.PreferPrefix)) { foreach (var query in queries.Values) { if (query.Chromosomes.Any(l => l.StartsWith(options.PreferPrefix))) { var chroms = query.Chromosomes.Where(l => l.StartsWith(options.PreferPrefix)).ToArray(); foreach (var chrom in chroms) { chromosomes[chrom].Queries.Remove(query); query.Chromosomes.Remove(chrom); } } } result.RemoveAll(l => l.Queries.Count == 0); } return(result); }
public int Extract(string sourceFile, string targetFile, IEnumerable <string> exceptQueryNames, string countFile) { int result = 0; var except = new HashSet <string>(exceptQueryNames); SmallRNACountMap cm = new SmallRNACountMap(); StreamWriter swCount = null; if (File.Exists(countFile)) { var oldCm = new SmallRNACountMap(countFile); foreach (var c in oldCm.Counts) { cm.Counts[c.Key.StringBefore(SmallRNAConsts.NTA_TAG)] = c.Value; } swCount = new StreamWriter(targetFile + ".dupcount"); } try { using (var sw = StreamUtils.GetWriter(targetFile, targetFile.ToLower().EndsWith(".gz"))) { using (var sr = SAMFactory.GetReader(sourceFile, true)) { string line; var count = 0; while ((line = sr.ReadLine()) != null) { count++; if (count % 100000 == 0) { Progress.SetMessage("{0} reads", count); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var ss = SAMUtils.Parse <SAMItemSlim>(line); ss.Qname = ss.Qname.StringBefore(SmallRNAConsts.NTA_TAG); if (except.Contains(ss.Qname)) { continue; } if (Filter != null && !Filter.Accept(ss)) { continue; } except.Add(ss.Qname); ss.WriteFastq(sw); if (swCount != null) { swCount.WriteLine("{0}\t{1}", ss.Qname, cm.Counts[ss.Qname]); } result++; } } } } finally { if (swCount != null) { swCount.Close(); } } return(result); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); _format = _options.GetSAMFormat(); totalQueries = new List <QueryInfo>(); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var qi = new QueryInfo(qname); totalQueries.Add(qi); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); if (!_filter.AcceptFlags(flag)) { continue; } var mismatchCount = _format.GetNumberOfMismatch(parts); var seq = parts[SAMFormatConst.SEQ_INDEX]; qi.Mismatch = mismatchCount; qi.Length = seq.Length; //too many mismatchs if (!_filter.AcceptMismatch(mismatchCount)) { continue; } if (!_filter.AcceptQueryName(qname)) { continue; } if (!_filter.AcceptLength(seq.Length)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; if (!_filter.AcceptCigar(cigar)) { continue; } var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"); var start = int.Parse(parts[SAMFormatConst.POS_INDEX]); var end = SAMUtils.ParseEnd(start, cigar); bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; } else { strand = '+'; } var sam = new T(); var loc = new SAMAlignedLocation(sam) { Seqname = seqname, Start = start, End = end, Strand = strand, }; if (!_filter.AcceptLocus(loc)) { continue; } if (isReversed) { seq = SequenceUtils.GetReverseComplementedSequence(seq); } sam.Qname = qname; sam.Sequence = seq; loc.AlignmentScore = _format.GetAlignmentScore(parts); loc.Cigar = cigar; loc.NumberOfMismatch = mismatchCount; loc.MismatchPositions = _format.GetMismatchPositions(parts); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } result.Add(sam); waitingcount++; if (waitingcount % 100 == 0) { Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count); } } } return(result); }
private IEnumerable <string> DoPairEndProcess() { var map = new Dictionary <string, SAMItemSlim>(); var output1 = Path.ChangeExtension(options.OutputFile, ".1" + Path.GetExtension(options.OutputFile)); var output2 = Path.ChangeExtension(options.OutputFile, ".2" + Path.GetExtension(options.OutputFile)); var ignored = new HashSet <string>(); using (var sw1 = new StreamWriter(output1)) { using (var sw2 = new StreamWriter(output2)) { var sw = new[] { null, sw1, sw2 }; using (var sr = SAMFactory.GetReader(options.InputFile, options.Samtools, true)) { string line; var count = 0; while ((line = sr.ReadLine()) != null) { count++; if (count % 100000 == 0) { Progress.SetMessage("{0} reads", count); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var ss = LineToPairedSamItem(line); if (ignored.Contains(ss.Qname)) { continue; } SAMItemSlim paired; if (map.TryGetValue(ss.Qname, out paired)) { if (paired.Pos == ss.Pos) { continue; } ss.WriteFastq(sw[ss.Pos], true); paired.WriteFastq(sw[paired.Pos], true); ignored.Add(ss.Qname); map.Remove(ss.Qname); } else { map[ss.Qname] = ss; } } } if (map.Count > 0) { var output3 = Path.ChangeExtension(options.OutputFile, ".orphan" + Path.GetExtension(options.OutputFile)); using (var sw3 = new StreamWriter(output3)) { foreach (var v in map.Values) { v.WriteFastq(sw3, true); } } } } } return(new[] { output1, output2 }); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); _format = _options.GetSAMFormat(); totalQueries = new List <QueryInfo>(); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count); } count++; var qname = line.StringBefore("\t"); //Console.WriteLine("line = {0}", line); //Console.WriteLine("query = {0}", qname); var qi = new QueryInfo(qname); totalQueries.Add(qi); var parts = line.Split('\t'); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } //too many mismatchs var mismatchCount = _format.GetNumberOfMismatch(parts); var seq = parts[SAMFormatConst.SEQ_INDEX]; qi.Mismatch = mismatchCount; qi.Length = seq.Length; qi.NoPenaltyMutation = 0; if (_options.T2cAsNoPenaltyMutation) { } if (mismatchCount > _options.MaximumMismatch) { continue; } if (!AcceptQueryName(qname)) { continue; } //too short if (seq.Length < _options.MinimumReadLength) { continue; } //too long if (seq.Length > _options.MaximumReadLength) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; ////insertion/deletion //if (cigar.Any(m => m == 'I' || m == 'D')) //{ // continue; //} bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; seq = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; } var score = _format.GetAlignmentScore(parts); var sam = new T() { Qname = qname, Sequence = seq }; var seqname = parts[SAMFormatConst.RNAME_INDEX]; var loc = new SAMAlignedLocation(sam) { Seqname = seqname, Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = cigar, NumberOfMismatch = mismatchCount, AlignmentScore = score, MismatchPositions = _format.GetMismatchPositions(parts) }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } result.Add(sam); waitingcount++; } Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count); } return(result); }
public override IEnumerable <string> Process() { var countFiles = options.GetCountFiles(); countFiles.Sort((m1, m2) => m1.Name.CompareTo(m2.Name)); var countMap = new Dictionary <string, Dictionary <string, int> >(); int fileIndex = 0; foreach (var file in countFiles) { fileIndex++; Progress.SetMessage("Reading {0}/{1}: {2} ...", fileIndex, countFiles.Count, file.File); var queries = new HashSet <string>(); using (var sr = SAMFactory.GetReader(file.File, true)) { int count = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var parts = line.Split('\t'); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } queries.Add(parts[SAMFormatConst.QNAME_INDEX]); } } var countDic = new Dictionary <string, int>(); countMap[file.Name] = countDic; var cm = new MapItemReader(0, 1, informationIndex: 2).ReadFromFile(file.AdditionalFile); foreach (var query in queries) { var count = cm[query]; countDic[count.Information] = int.Parse(count.Value); } Progress.SetMessage("{0} reads mapped.", queries.Count); } var uniques = (from c in countMap.Values from seq in c.Keys select seq).Distinct().ToArray(); var uniqueCounts = (from seq in uniques let totalCount = (from c in countMap.Values where c.ContainsKey(seq) select c[seq]).Sum() select new { Sequence = seq, Count = totalCount }).OrderByDescending(m => m.Count).ToArray(); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("Sequence\t" + (from cf in countFiles select cf.Name).Merge("\t")); foreach (var uc in uniqueCounts) { var seq = uc.Sequence; sw.Write(seq); foreach (var cf in countFiles) { var map = countMap[cf.Name]; int count; if (map.TryGetValue(seq, out count)) { sw.Write("\t{0}", count); } else { sw.Write("\t0"); } } sw.WriteLine(); } } Progress.End(); return(new string[] { Path.GetFullPath(options.OutputFile) }); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { Progress.SetMessage("Find queries overlapped with coordinates..."); rangeQueries = new HashSet <string>(); var miss1file = options.CoordinateFile + ".miss1"; var miss0file = options.CoordinateFile + ".miss0"; if (File.Exists(miss1file) && File.Exists(miss0file) && !options.T2cAsNoPenaltyMutation) { var miss1Queries = new HashSet <string>(); using (var sr = SAMFactory.GetReader(fileName, true, miss1file)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("@")) { continue; } var qname = line.StringBefore("\t"); miss1Queries.Add(qname); } } Progress.SetMessage("Miss 1 queries : {0}", miss1Queries.Count); var miss0Queries = new HashSet <string>(); using (var sr = SAMFactory.GetReader(fileName, true, miss0file)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("@")) { continue; } if (line.Contains("NM:i:0")) { var qname = line.StringBefore("\t"); miss0Queries.Add(qname); } } } Progress.SetMessage("Miss 0 queries : {0}", miss0Queries.Count); rangeQueries.UnionWith(miss1Queries); rangeQueries.UnionWith(miss0Queries); miss1Queries.Clear(); miss0Queries.Clear(); } else { using (var sr = SAMFactory.GetReader(fileName, true, options.CoordinateFile)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("@")) { continue; } var qname = line.StringBefore("\t"); rangeQueries.Add(qname); } } } Progress.SetMessage("{0} queries overlaped with coordinates.", rangeQueries.Count); return(base.DoBuild <T>(fileName, out totalQueries)); }