public List <SequenceRegionMapped> ReadFromFile(string fileName) { List <SequenceRegionMapped> result = new List <SequenceRegionMapped>(); XElement root = XElement.Load(fileName); var qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap(); foreach (var regionEle in root.Element("regions").Elements("region")) { var position = new SequenceRegionMapped(); result.Add(position); position.Region = new SequenceRegion(); position.Region.Name = regionEle.Attribute("name").Value; position.Region.ParseLocation(regionEle); foreach (var queryEle in regionEle.Elements("query")) { var qname = queryEle.Attribute("qname").Value; var loc = queryEle.Attribute("loc").Value; var key = SAMAlignedLocation.GetKey(qname, loc); var query = qmmap[key]; position.AlignedLocations.Add(query); query.Features.Add(position.Region); } } qmmap.Clear(); return(result); }
public List <MappedItemGroup> ReadFromFile(string fileName) { var result = new List <MappedItemGroup>(); XElement root = XElement.Load(fileName); //Console.WriteLine("read locations ..."); Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap(); //Console.WriteLine("read mapped items ..."); foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup")) { var group = new MappedItemGroup(); result.Add(group); foreach (XElement mirnaEle in groupEle.Elements("subject")) { var mirna = new MappedItem(); group.Add(mirna); mirna.Name = mirnaEle.Attribute("name").Value; foreach (XElement regionEle in mirnaEle.Elements("region")) { var region = new SequenceRegionMapped(); mirna.MappedRegions.Add(region); region.Region.Name = mirna.Name; region.Region.ParseLocation(regionEle); if (regionEle.Attribute("sequence") != null) { region.Region.Sequence = regionEle.Attribute("sequence").Value; } if (regionEle.Attribute("query_count_before_filter") != null) { region.QueryCountBeforeFilter = int.Parse(regionEle.Attribute("query_count_before_filter").Value); } if (regionEle.Attribute("pvalue") != null) { region.PValue = double.Parse(regionEle.Attribute("pvalue").Value); } foreach (XElement queryEle in regionEle.Elements("query")) { string qname = queryEle.Attribute("qname").Value; string loc = queryEle.Attribute("loc").Value; string key = SAMAlignedLocation.GetKey(qname, loc); SAMAlignedLocation query = qmmap[key]; region.AlignedLocations.Add(query); query.Features.Add(region.Region); } } } } qmmap.Clear(); return(result); }
public override IEnumerable <string> Process(string useless) { var result = new MappedMirnaGroupXmlFileFormat().ReadFromFile(options.InputFile); using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("miRNA\tLocation\tTotalCount\tPerfectMatch\tMiss5_2\tMiss3_3\tMissInternal"); foreach (var res in result) { var items = res.GetAlignedLocations(); if (res.DisplayName.Equals("hsa-mir-486-5p:TCCTGTACTGAGCTGCCCCGAG")) { items.ForEach(m => Console.WriteLine(m.Parent.Qname + "\t" + m.Strand + "\t" + m.MismatchPositions)); } var pmcount = items.Count(m => m.NumberOfMismatch == 0); var mis5 = items.Count(m => { SAMAlignedLocation loc = m; if (loc.NumberOfMismatch == 0) { return(false); } var mp = loc.MismatchPositions; if (loc.Strand == '-') { mp = new string(mp.Reverse().ToArray()); } return(reg5.Match(mp).Success); }); var mis3 = items.Count(m => { var loc = m; if (loc.NumberOfMismatch == 0) { return(false); } var mp = loc.MismatchPositions; if (loc.Strand == '+') { mp = new string(mp.Reverse().ToArray()); } return(reg3.Match(mp).Success); }); sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", res.DisplayName, res.DisplayLocation, items.Count, pmcount, mis5, mis3, items.Count - pmcount - mis5 - mis3); } } return(new string[] { options.OutputFile }); }
public virtual AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc) { var result = CheckNoPenaltyMutation(floc, sloc); if (!result.Accepted) { return(result); } result.OverlapPercentage = floc.OverlapPercentage(sloc); result.Accepted = result.OverlapPercentage > 0 && result.OverlapPercentage >= Options.MinimumOverlapPercentage; return(result); }
public override AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc) { var result = base.AcceptLocationPair(floc, sloc); if (!result.Accepted) { return(result); } var offset = sloc.Offset(floc); result.Accepted = Options.Offsets.Contains(offset); return(result); }
private AcceptResult CheckNoPenaltyMutation(FeatureLocation floc, SAMAlignedLocation sloc) { if (sloc.NumberOfNoPenaltyMutation > 0) { var polys = sloc.GetGsnapMismatches(); var mismatch = 0; if (floc.Strand == sloc.Strand) //the non-penalty mutation has to be T2C { mismatch = polys.Count(m => m.RefAllele != 'T' || m.SampleAllele != 'C'); } else { mismatch = polys.Count(m => m.RefAllele != 'A' || m.SampleAllele != 'G'); } var nnpm = sloc.NumberOfMismatch + sloc.NumberOfNoPenaltyMutation - mismatch; if (mismatch > Options.MaximumMismatch || nnpm > Options.MaximumNoPenaltyMutationCount) { return(new AcceptResult() { Accepted = false }); } return(new AcceptResult() { Accepted = true, NumberOfMismatch = mismatch, NumberOfNoPenaltyMutation = sloc.NumberOfMismatch + sloc.NumberOfNoPenaltyMutation - mismatch }); } else if (sloc.NumberOfMismatch > Options.MaximumMismatch) { return(new AcceptResult() { Accepted = false }); } else { return(new AcceptResult() { Accepted = true, NumberOfMismatch = sloc.NumberOfMismatch, NumberOfNoPenaltyMutation = 0 }); } }
public override AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc) { if (sloc.Parent.Sequence.Length < Options.MinimumReadLengthForLongRNA) { return(new AcceptResult() { Accepted = false }); } if (sloc.NumberOfMismatch > Options.MaximumMismatchForLongRNA) { return(new AcceptResult() { Accepted = false }); } return(base.AcceptLocationPair(floc, sloc)); }
public override bool AcceptLocus(SAMAlignedLocation loc) { var result = false; if (!loc.Seqname.Equals(lastSeqname)) { if (!featureMap.TryGetValue(loc.Seqname, out lastFeatures)) { return(false); } lastSeqname = loc.Seqname; } foreach (var feature in lastFeatures) { if (feature.End < loc.Start) { continue; } if (feature.Start > loc.End) { break; } if (feature.Overlap(loc, this.minOverlapPercentage)) { result = true; var samloc = new FeatureSamLocation(feature); samloc.SamLocation = loc; feature.SamLocations.Add(samloc); } } return(result); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); totalQueries = new List <QueryInfo>(); using (var sr = StreamUtils.GetReader(fileName)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { if (!line.StartsWith(">")) { continue; } if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count); } count++; //just for test //if (waitingcount == 10000) //{ // break; //} var parts = line.Split('\t'); var qname = parts[3]; bool hasNTATag = qname.HasNTATag(); bool hasNTA = qname.HasNTA(); if (_options.IgnoreNTA) { if (hasNTA) { continue; } } var qi = new QueryInfo(qname); totalQueries.Add(qi); int matchCount = int.Parse(parts[1]); if (matchCount == 0) { continue; } var seq = parts[0].Substring(1); qi.Length = seq.Length; //contains 'N' if (seq.Contains('N')) { continue; } //too short if (seq.Length < _options.MinimumReadLength) { continue; } //too long if (seq.Length > _options.MaximumReadLength) { continue; } var sam = new T() { Qname = qname, Sequence = seq }; for (int i = 0; i < matchCount; i++) { string matchline = sr.ReadLine(); if (string.IsNullOrWhiteSpace(matchline)) { sam.ClearLocations(); break; } var matchparts = matchline.Split('\t'); var matchgenome = matchparts[0].Trim(); if (matchgenome.Contains('-'))//insertion or deletion, not allowed now { continue; } if (matchgenome.Contains('*'))//soft clip, not allowed now { continue; } string mismatchPosition = string.Empty; string cigar = string.Empty; int mismatch; int mutation; GetMismatchPositions(seq, matchgenome, ref mismatchPosition, ref cigar, out mutation, out mismatch); qi.Mismatch = mismatch; if (mismatch > _options.MaximumMismatch) { continue; } if (mutation > _options.MaximumNoPenaltyMutationCount) { continue; } if (_options.IgnoreNTAAndNoPenaltyMutation) { if (mutation > 0) { if (hasNTA) { continue; } if (hasNTATag) { var pos = cigar.LastIndexOf('.'); if (pos >= cigar.Length - 3) { continue; } } } } var match = locReg.Match(matchparts[2]); var strand = match.Groups[1].Value[0]; var chr = match.Groups[2].Value; var start = int.Parse(match.Groups[3].Value); var end = int.Parse(match.Groups[4].Value); var loc = new SAMAlignedLocation(sam) { Seqname = chr, Start = strand == '+' ? start : end, End = strand == '+' ? end : start, Strand = strand, NumberOfMismatch = mismatch, NumberOfNoPenaltyMutation = mutation, Cigar = cigar, MismatchPositions = mismatchPosition }; sam.AddLocation(loc); } if (sam.Locations.Count > 0) { if (sam.Locations.Count > 1) { var minNNPM = sam.Locations.Min(m => m.NumberOfNoPenaltyMutation); sam.RemoveLocation(m => m.NumberOfNoPenaltyMutation > minNNPM); } result.Add(sam); waitingcount++; } } Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count); } return(result); }
public SAMAlignedItem NextSAMAlignedItem() { string line; while ((line = _file.ReadLine()) != null) { var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var seq = parts[SAMFormatConst.SEQ_INDEX]; var flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } //check map quality var mapq = int.Parse(parts[SAMFormatConst.MAPQ_INDEX]); if (mapq < _options.MinimumReadQuality) { continue; } var sam = new SAMAlignedItem { Qname = qname, }; bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; sam.Sequence = seq; } var loc = new SAMAlignedLocation(sam) { Seqname = parts[SAMFormatConst.RNAME_INDEX], Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = parts[SAMFormatConst.CIGAR_INDEX], MismatchPositions = _format.GetMismatchPositions(parts), NumberOfMismatch = _format.GetNumberOfMismatch(parts), Sequence = seq, Qual = parts[SAMFormatConst.QUAL_INDEX] }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } return(sam); } return(null); }
public virtual List <FeatureItemGroup> ReadFromFile(string fileName) { var result = new List <FeatureItemGroup>(); using (XmlReader source = XmlReader.Create(fileName)) { Progress.SetMessage("reading queries ..."); List <SAMAlignedItem> queries = SAMAlignedItemUtils.ReadFrom(source); Progress.SetMessage("{0} queries read.", queries.Count); var qmmap = queries.ToSAMAlignedLocationMap(); queries.Clear(); Progress.SetMessage("reading subjects ..."); string value; source.ReadToFollowing("subjectResult"); if (source.ReadToDescendant("subjectGroup")) { do { var featureGroup = new FeatureItemGroup(); result.Add(featureGroup); if (source.ReadToDescendant("subject")) { do { var item = new FeatureItem(); featureGroup.Add(item); item.Name = source.GetAttribute("name"); if (source.ReadToDescendant("region")) { do { var fl = new FeatureLocation(); item.Locations.Add(fl); fl.Name = item.Name; fl.Seqname = source.GetAttribute("seqname"); fl.Start = long.Parse(source.GetAttribute("start")); fl.End = long.Parse(source.GetAttribute("end")); fl.Strand = source.GetAttribute("strand")[0]; fl.Sequence = source.GetAttribute("sequence"); value = source.GetAttribute("query_count_before_filter"); if (value != null) { fl.QueryCountBeforeFilter = int.Parse(value); } value = source.GetAttribute("pvalue"); if (value != null) { fl.PValue = double.Parse(value); } if (source.ReadToDescendant("query")) { do { string qname = source.GetAttribute("qname"); string loc = source.GetAttribute("loc"); string key = SAMAlignedLocation.GetKey(qname, loc); SAMAlignedLocation query = qmmap[key]; FeatureSamLocation fsl = new FeatureSamLocation(fl); fsl.SamLocation = query; fsl.Offset = int.Parse(source.GetAttribute("offset")); var attr = source.GetAttribute("overlap"); if (attr == null) { fsl.OverlapPercentage = query.OverlapPercentage(fl); } else { fsl.OverlapPercentage = double.Parse(attr); } var nmi = source.GetAttribute("nmi"); if (nmi != null) { fsl.NumberOfMismatch = int.Parse(nmi); } var nnpm = source.GetAttribute("nnpm"); if (nnpm != null) { fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm); } } while (source.ReadToNextSibling("query")); } } while (source.ReadToNextSibling("region")); } } while (source.ReadToNextSibling("subject")); } } while (source.ReadToNextSibling("subjectGroup")); } qmmap.Clear(); } Progress.SetMessage("{0} subjects read.", result.Count); return(result); }
public List <FeatureItemGroup> ReadFromFile(string fileName) { Console.WriteLine("read file {0} ...", fileName); var result = new List <FeatureItemGroup>(); XElement root = XElement.Load(fileName); //Console.WriteLine("read locations ..."); Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap(); //Console.WriteLine("read mapped items ..."); foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup")) { var group = new FeatureItemGroup(); result.Add(group); foreach (XElement featureEle in groupEle.Elements("subject")) { var item = new FeatureItem(); group.Add(item); item.Name = featureEle.Attribute("name").Value; foreach (XElement locEle in featureEle.Elements("region")) { var fl = new FeatureLocation(); item.Locations.Add(fl); fl.Name = item.Name; fl.ParseLocation(locEle); if (locEle.Attribute("sequence") != null) { fl.Sequence = locEle.Attribute("sequence").Value; } if (locEle.Attribute("query_count_before_filter") != null) { fl.QueryCountBeforeFilter = int.Parse(locEle.Attribute("query_count_before_filter").Value); } if (locEle.Attribute("pvalue") != null) { fl.PValue = double.Parse(locEle.Attribute("pvalue").Value); } foreach (XElement queryEle in locEle.Elements("query")) { string qname = queryEle.Attribute("qname").Value; string loc = queryEle.Attribute("loc").Value; string key = SAMAlignedLocation.GetKey(qname, loc); SAMAlignedLocation query = qmmap[key]; FeatureSamLocation fsl = new FeatureSamLocation(fl); fsl.SamLocation = query; var attr = queryEle.FindAttribute("overlap"); if (attr == null) { fsl.OverlapPercentage = query.OverlapPercentage(fl); } else { fsl.OverlapPercentage = double.Parse(attr.Value); } var nnpm = queryEle.FindAttribute("nnpm"); if (nnpm == null) { nnpm = queryEle.FindAttribute("nnmp"); } if (nnpm != null) { fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm.Value); } var nmi = queryEle.FindAttribute("nmi"); if (nmi != null) { fsl.NumberOfMismatch = int.Parse(nmi.Value); } } } } } qmmap.Clear(); return(result); }
public override IEnumerable <string> Process() { PileupCountList pc = new PileupCountList(); var format = options.GetSAMFormat(); var cm = new SmallRNACountMap(options.CountFile); var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf); srItems.ForEach(m => { m.Seqname = m.Seqname.StringAfter("chr"); }); var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList()); StreamWriter swScript = null; try { if (options.ExportIgvScript) { swScript = new StreamWriter(options.OutputFile + ".igv"); swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/')); } using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine(@"##fileformat=VCFv4.2 ##fileDate={0:yyyyMMdd} ##source={1} ##phasing=partial ##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data""> ##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth""> ##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency""> ##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value""> ##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position""> ##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}""> ##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}""> ##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus""> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth""> ##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth""> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT {4}", DateTime.Now, "PileupCountBuilder", options.FisherPValue, options.MinimumAlternativeAlleleFrequency, Path.GetFileNameWithoutExtension(options.InputFile)); using (var sr = SAMFactory.GetReader(options.InputFile, true)) { int count = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 100 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0) { Progress.SetMessage("{0} reads processed", count); } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var seq = parts[SAMFormatConst.SEQ_INDEX]; //too short if (seq.Length < options.MinimumReadLength) { continue; } SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; //insertion/deletion if (cigar.Any(m => m == 'I' || m == 'D')) { continue; } var sam = new SAMAlignedItem() { Qname = qname, }; bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; sam.Sequence = seq; } var loc = new SAMAlignedLocation(sam) { Seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"), Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = parts[SAMFormatConst.CIGAR_INDEX], MismatchPositions = format.GetMismatchPositions(parts), NumberOfMismatch = format.GetNumberOfMismatch(parts), Sequence = seq }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (format.HasAlternativeHits) { format.ParseAlternativeHits(parts, sam); } var finished = pc.Add(sam, cm.GetCount(sam.Qname)); if (null == finished || 0 == finished.Count) { continue; } foreach (var fin in finished) { //if (fin.Chromosome.Equals("1") && fin.Position == 5160725) //{ // Console.WriteLine(fin); //} var ft = fin.FisherExactTest(); if (ft.PValue <= options.FisherPValue) { var total = fin.Sum(m => m.Value); var minallele = total * options.MinimumAlternativeAlleleFrequency; if (ft.Sample2.Failed >= minallele) { List <GtfItem> srs; List <string> ranges = new List <string>(); if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs)) { foreach (var seqr in srs) { if (seqr.Contains(fin.Position)) { ranges.Add(seqr.GetNameLocation()); } } } var alter = (from r in fin where r.Key != fin.Reference orderby r.Key select r).ToList(); var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}", fin.Chromosome, fin.Position, ".", fin.Reference, (from r in alter select r.Key.ToString()).Merge(","), 0, ranges.Count == 0 ? "notMiRNA" : "PASS", 1, total, (from r in alter select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","), ft.PValue, ranges.Count == 0 ? "" : ";" + ranges.Merge(","), total, ft.Sample2.Succeed, (from r in alter select r.Value.ToString()).Merge(",")); sw.WriteLine(str); //Console.WriteLine(str); if (swScript != null && ranges.Count > 0) { swScript.WriteLine(@"goto {0}:{1} sort position snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_')); } } } } finished.Clear(); } } } } finally { if (swScript != null) { swScript.Close(); } } return(new string[] { options.OutputFile }); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); _format = _options.GetSAMFormat(); totalQueries = new List <QueryInfo>(); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count); } count++; var qname = line.StringBefore("\t"); //Console.WriteLine("line = {0}", line); //Console.WriteLine("query = {0}", qname); var qi = new QueryInfo(qname); totalQueries.Add(qi); var parts = line.Split('\t'); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } //too many mismatchs var mismatchCount = _format.GetNumberOfMismatch(parts); var seq = parts[SAMFormatConst.SEQ_INDEX]; qi.Mismatch = mismatchCount; qi.Length = seq.Length; qi.NoPenaltyMutation = 0; if (_options.T2cAsNoPenaltyMutation) { } if (mismatchCount > _options.MaximumMismatch) { continue; } if (!AcceptQueryName(qname)) { continue; } //too short if (seq.Length < _options.MinimumReadLength) { continue; } //too long if (seq.Length > _options.MaximumReadLength) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; ////insertion/deletion //if (cigar.Any(m => m == 'I' || m == 'D')) //{ // continue; //} bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; seq = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; } var score = _format.GetAlignmentScore(parts); var sam = new T() { Qname = qname, Sequence = seq }; var seqname = parts[SAMFormatConst.RNAME_INDEX]; var loc = new SAMAlignedLocation(sam) { Seqname = seqname, Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = cigar, NumberOfMismatch = mismatchCount, AlignmentScore = score, MismatchPositions = _format.GetMismatchPositions(parts) }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } result.Add(sam); waitingcount++; } Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count); } return(result); }
private void FindLocation(List <SAMAlignedLocation> list, List <MappedMirnaRegion> list_2, out SAMAlignedLocation loc, out MappedMirnaRegion reg) { throw new NotImplementedException(); }
public virtual bool AcceptLocus(SAMAlignedLocation loc) { return(true); }
protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries) { var result = new List <T>(); _format = _options.GetSAMFormat(); totalQueries = new List <QueryInfo>(); using (var sr = SAMFactory.GetReader(fileName, true)) { int count = 0; int waitingcount = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var qi = new QueryInfo(qname); totalQueries.Add(qi); SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); if (!_filter.AcceptFlags(flag)) { continue; } var mismatchCount = _format.GetNumberOfMismatch(parts); var seq = parts[SAMFormatConst.SEQ_INDEX]; qi.Mismatch = mismatchCount; qi.Length = seq.Length; //too many mismatchs if (!_filter.AcceptMismatch(mismatchCount)) { continue; } if (!_filter.AcceptQueryName(qname)) { continue; } if (!_filter.AcceptLength(seq.Length)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; if (!_filter.AcceptCigar(cigar)) { continue; } var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"); var start = int.Parse(parts[SAMFormatConst.POS_INDEX]); var end = SAMUtils.ParseEnd(start, cigar); bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; } else { strand = '+'; } var sam = new T(); var loc = new SAMAlignedLocation(sam) { Seqname = seqname, Start = start, End = end, Strand = strand, }; if (!_filter.AcceptLocus(loc)) { continue; } if (isReversed) { seq = SequenceUtils.GetReverseComplementedSequence(seq); } sam.Qname = qname; sam.Sequence = seq; loc.AlignmentScore = _format.GetAlignmentScore(parts); loc.Cigar = cigar; loc.NumberOfMismatch = mismatchCount; loc.MismatchPositions = _format.GetMismatchPositions(parts); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } result.Add(sam); waitingcount++; if (waitingcount % 100 == 0) { Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count); } } } return(result); }