public static List <SAMAlignedItem> ToSAMAlignedItems(this XElement root) { var result = new List <SAMAlignedItem>(); foreach (var queryEle in root.Element("queries").Elements("query")) { var query = new SAMAlignedItem(); query.Qname = queryEle.Attribute("name").Value; query.Sequence = queryEle.Attribute("sequence").Value; query.QueryCount = int.Parse(queryEle.Attribute("count").Value); query.Sample = queryEle.GetAttributeValue("sample", null); result.Add(query); foreach (var locEle in queryEle.Elements("location")) { var loc = new SAMAlignedLocation(query); loc.ParseLocation(locEle); loc.Cigar = locEle.Attribute("cigar").Value; loc.AlignmentScore = int.Parse(locEle.Attribute("score").Value); loc.MismatchPositions = locEle.Attribute("mdz").Value; loc.NumberOfMismatch = int.Parse(locEle.Attribute("nmi").Value); var nnmpattr = locEle.Attribute("nnpm"); if (nnmpattr != null) { loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr.Value); } } } return(result); }
public void TestGetGsnapMismatches() { var qry = "GTTTCTGTAGTGTAGTGGTTATCACGTTCGCCT"; var sloc = new SAMAlignedLocation(new SAMAlignedItem() { Sequence = qry }) { Strand = '+', NumberOfMismatch = 1, NumberOfNoPenaltyMutation = 2, Cigar = "GTTTCc.TAGTGTAGTGGTTATCAC.TTCGCCT", MismatchPositions = "5CA18A7" }; var polys = sloc.GetGsnapMismatches(); Assert.AreEqual(5, polys[0].Position); Assert.AreEqual('C', polys[0].RefAllele); Assert.AreEqual('T', polys[0].SampleAllele); Assert.AreEqual(6, polys[1].Position); Assert.AreEqual('A', polys[1].RefAllele); Assert.AreEqual('G', polys[1].SampleAllele); Assert.AreEqual(25, polys[2].Position); Assert.AreEqual('A', polys[2].RefAllele); Assert.AreEqual('G', polys[2].SampleAllele); }
public override bool AcceptLocus(SAMAlignedLocation loc) { var result = false; if (!loc.Seqname.Equals(lastSeqname)) { if (!featureMap.TryGetValue(loc.Seqname, out lastFeatures)) { return false; } lastSeqname = loc.Seqname; } foreach (var feature in lastFeatures) { if (feature.End < loc.Start) { continue; } if (feature.Start > loc.End) { break; } if (feature.Overlap(loc, this.minOverlapPercentage)) { result = true; var samloc = new FeatureSamLocation(feature); samloc.SamLocation = loc; feature.SamLocations.Add(samloc); } } return result; }
public void Test() { var proc = new SmallRNACountProcessor(new SmallRNACountProcessorOptions() { }); var floc = new FeatureLocation() { }; var sloc = new SAMAlignedLocation(null) { }; var actual = proc.CheckNoPenaltyMutation(floc, sloc, 1); }
public void RemoveLocation(SAMAlignedLocation loc) { if (this._locations.Contains(loc)) { if (this == loc.Parent) { loc.Parent = null; } this._locations.Remove(loc); } }
public void RemoveLocation(SAMAlignedLocation loc) { if (this._locations.Contains(loc)) { if (this == loc.Parent) { loc.Parent = null; } this._locations.Remove(loc); } }
public void AddLocation(SAMAlignedLocation loc) { if (loc.Parent != this && loc.Parent != null) { loc.Parent.RemoveLocation(loc); } loc.Parent = this; if (!this._locations.Contains(loc)) { this._locations.Add(loc); } }
public void AddLocation(SAMAlignedLocation loc) { if (loc.Parent != this && loc.Parent != null) { loc.Parent.RemoveLocation(loc); } loc.Parent = this; if (!this._locations.Contains(loc)) { this._locations.Add(loc); } }
public static List <SAMAlignedItem> ReadFrom(XmlReader source) { var result = new List <SAMAlignedItem>(); source.ReadToFollowing("queries"); if (source.ReadToDescendant("query")) { do { var query = new SAMAlignedItem(); result.Add(query); query.Qname = source.GetAttribute("name"); query.Sequence = source.GetAttribute("sequence"); query.QueryCount = int.Parse(source.GetAttribute("count")); query.Sample = source.GetAttribute("sample"); if (source.ReadToDescendant("location")) { do { var loc = new SAMAlignedLocation(query); loc.Seqname = source.GetAttribute("seqname"); loc.Start = long.Parse(source.GetAttribute("start")); loc.End = long.Parse(source.GetAttribute("end")); loc.Strand = source.GetAttribute("strand")[0]; loc.Cigar = source.GetAttribute("cigar"); loc.AlignmentScore = int.Parse(source.GetAttribute("score")); loc.MismatchPositions = source.GetAttribute("mdz"); loc.NumberOfMismatch = int.Parse(source.GetAttribute("nmi")); var nnmpattr = source.GetAttribute("nnpm"); if (nnmpattr != null) { loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr); } } while (source.ReadToNextSibling("location")); } } while (source.ReadToNextSibling("query")); } return(result); }
public static List<SAMAlignedItem> ReadFrom(XmlReader source) { var result = new List<SAMAlignedItem>(); source.ReadToFollowing("queries"); if (source.ReadToDescendant("query")) { do { var query = new SAMAlignedItem(); result.Add(query); query.Qname = source.GetAttribute("name"); query.Sequence = source.GetAttribute("sequence"); query.QueryCount = int.Parse(source.GetAttribute("count")); query.Sample = source.GetAttribute("sample"); if (source.ReadToDescendant("location")) { do { var loc = new SAMAlignedLocation(query); loc.Seqname = source.GetAttribute("seqname"); loc.Start = long.Parse(source.GetAttribute("start")); loc.End = long.Parse(source.GetAttribute("end")); loc.Strand = source.GetAttribute("strand")[0]; loc.Cigar = source.GetAttribute("cigar"); loc.AlignmentScore = int.Parse(source.GetAttribute("score")); loc.MismatchPositions = source.GetAttribute("mdz"); loc.NumberOfMismatch = int.Parse(source.GetAttribute("nmi")); var nnmpattr = source.GetAttribute("nnpm"); if (nnmpattr != null) { loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr); } } while (source.ReadToNextSibling("location")); } } while (source.ReadToNextSibling("query")); } return result; }
public override void ParseAlternativeHits(string[] parts, SAMAlignedItem item) { var countstr = GetOptionValue(parts, "X0:i:", false); if (string.IsNullOrEmpty(countstr)) { return; } var count = int.Parse(countstr) - 1; if (count == 0) { return; } var xaz = GetOptionValue(parts, "XA:Z:", false); if (string.IsNullOrEmpty(xaz)) { return; } var match = _reg.Match(xaz); for (var i = 0; i < count; i++) { var loc = new SAMAlignedLocation(item) { Seqname = match.Groups[1].Value, Strand = match.Groups[2].Value[0], Start = long.Parse(match.Groups[3].Value) }; loc.End = loc.Start + item.Locations[0].Length - 1; loc.Cigar = match.Groups[4].Value; loc.NumberOfMismatch = int.Parse(match.Groups[5].Value); item.AddLocation(loc); } }
public void TestMethod() { var qry = "TCCTGTACTGAGCTGCCCCGAGA"; var locNoMutation = new SAMAlignedLocation(null) { Strand = '+', NumberOfMismatch = 0, MismatchPositions = "23" }; Assert.IsNull(locNoMutation.GetNotGsnapMismatch(qry)); var locPositive = new SAMAlignedLocation(null) { Strand = '+', NumberOfMismatch = 1, MismatchPositions = "22C0" }; var retPositive = locPositive.GetNotGsnapMismatch(qry); Assert.IsNotNull(retPositive); Assert.AreEqual('C', retPositive.RefAllele); Assert.AreEqual('A', retPositive.SampleAllele); var locNegative = new SAMAlignedLocation(null) { Strand = '-', NumberOfMismatch = 1, MismatchPositions = "0C22" }; var retNegative = locNegative.GetNotGsnapMismatch(qry); Assert.IsNotNull(retNegative); Assert.AreEqual('G', retNegative.RefAllele); Assert.AreEqual('A', retNegative.SampleAllele); }
public override IEnumerable<string> Process() { PileupCountList pc = new PileupCountList(); var format = options.GetSAMFormat(); var cm = new SmallRNACountMap(options.CountFile); var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf); srItems.ForEach(m => { m.Seqname = m.Seqname.StringAfter("chr"); }); var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList()); StreamWriter swScript = null; try { if (options.ExportIgvScript) { swScript = new StreamWriter(options.OutputFile + ".igv"); swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/')); } using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine(@"##fileformat=VCFv4.2 ##fileDate={0:yyyyMMdd} ##source={1} ##phasing=partial ##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data""> ##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth""> ##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency""> ##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value""> ##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position""> ##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}""> ##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}""> ##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus""> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth""> ##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth""> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT {4}", DateTime.Now, "PileupCountBuilder", options.FisherPValue, options.MinimumAlternativeAlleleFrequency, Path.GetFileNameWithoutExtension(options.InputFile)); using (var sr = SAMFactory.GetReader(options.InputFile, true)) { int count = 0; string line; while ((line = sr.ReadLine()) != null) { count++; if (count % 100 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0) { Progress.SetMessage("{0} reads processed", count); } var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var seq = parts[SAMFormatConst.SEQ_INDEX]; //too short if (seq.Length < options.MinimumReadLength) { continue; } SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } var cigar = parts[SAMFormatConst.CIGAR_INDEX]; //insertion/deletion if (cigar.Any(m => m == 'I' || m == 'D')) { continue; } var sam = new SAMAlignedItem() { Qname = qname, }; bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; sam.Sequence = seq; } var loc = new SAMAlignedLocation(sam) { Seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"), Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = parts[SAMFormatConst.CIGAR_INDEX], MismatchPositions = format.GetMismatchPositions(parts), NumberOfMismatch = format.GetNumberOfMismatch(parts), Sequence = seq }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (format.HasAlternativeHits) { format.ParseAlternativeHits(parts, sam); } var finished = pc.Add(sam, cm.GetCount(sam.Qname)); if (null == finished || 0 == finished.Count) { continue; } foreach (var fin in finished) { //if (fin.Chromosome.Equals("1") && fin.Position == 5160725) //{ // Console.WriteLine(fin); //} var ft = fin.FisherExactTest(); if (ft.PValue <= options.FisherPValue) { var total = fin.Sum(m => m.Value); var minallele = total * options.MinimumAlternativeAlleleFrequency; if (ft.Sample2.Failed >= minallele) { List<GtfItem> srs; List<string> ranges = new List<string>(); if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs)) { foreach (var seqr in srs) { if (seqr.Contains(fin.Position)) { ranges.Add(seqr.GetNameLocation()); } } } var alter = (from r in fin where r.Key != fin.Reference orderby r.Key select r).ToList(); var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}", fin.Chromosome, fin.Position, ".", fin.Reference, (from r in alter select r.Key.ToString()).Merge(","), 0, ranges.Count == 0 ? "notMiRNA" : "PASS", 1, total, (from r in alter select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","), ft.PValue, ranges.Count == 0 ? "" : ";" + ranges.Merge(","), total, ft.Sample2.Succeed, (from r in alter select r.Value.ToString()).Merge(",")); sw.WriteLine(str); //Console.WriteLine(str); if (swScript != null && ranges.Count > 0) { swScript.WriteLine(@"goto {0}:{1} sort position snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_')); } } } } finished.Clear(); } } } } finally { if (swScript != null) { swScript.Close(); } } return new string[] { options.OutputFile }; }
public virtual bool AcceptLocus(SAMAlignedLocation loc) { return true; }
protected override SAMAlignedItem GetAlignedSequence() { var array = new byte[4]; ReadUnCompressedData(array, 0, 4); int blockLen = Helper.GetInt32(array, 0); var alignmentBlock = new byte[blockLen]; ReadUnCompressedData(alignmentBlock, 0, blockLen); if (!Filter.Accept(alignmentBlock)) { return(null); } var result = new SAMAlignedItem(); var loc = new SAMAlignedLocation(result); int value; // 0-4 bytes var refSeqIndex = Helper.GetInt32(alignmentBlock, 0); loc.Seqname = refSeqIndex == -1 ? "*" : RefSeqNames[refSeqIndex]; // 4-8 bytes loc.Start = Helper.GetInt32(alignmentBlock, 4) + 1; // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len" var unsignedValue = Helper.GetUInt32(alignmentBlock, 8); // 10 -12 bytes //alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16; // 9th bytes loc.MapQ = (int)(unsignedValue & 0x0000FF00) >> 8; // 8th bytes var queryNameLen = (int)(unsignedValue & 0x000000FF); // 12 - 16 bytes unsignedValue = Helper.GetUInt32(alignmentBlock, 12); // 14-16 bytes var flagValue = (int)(unsignedValue & 0xFFFF0000) >> 16; loc.Flag = (SAMFlags)flagValue; // 12-14 bytes var cigarLen = (int)(unsignedValue & 0x0000FFFF); // 16-20 bytes var readLen = Helper.GetInt32(alignmentBlock, 16); // 32-(32+readLen) bytes result.Qname = Encoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1); var strbuilder = new StringBuilder(); var startIndex = 32 + queryNameLen; for (var i = startIndex; i < (startIndex + cigarLen * 4); i += 4) { // Get the CIGAR operation length stored in first 28 bits. var cigarValue = Helper.GetUInt32(alignmentBlock, i); strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture)); // Get the CIGAR operation stored in last 4 bits. value = (int)cigarValue & 0x0000000F; // MIDNSHP=>0123456 strbuilder.Append(GetCigarChar(value, result.Qname)); } var cigar = strbuilder.ToString(); loc.Cigar = string.IsNullOrWhiteSpace(cigar) ? "*" : cigar; startIndex += cigarLen * 4; var sequence = new StringBuilder(); var index = startIndex; for (; index < (startIndex + (readLen + 1) / 2) - 1; index++) { // Get first 4 bit value value = (alignmentBlock[index] & 0xF0) >> 4; sequence.Append(GetSeqChar(value, result.Qname)); // Get last 4 bit value value = alignmentBlock[index] & 0x0F; sequence.Append(GetSeqChar(value, result.Qname)); } value = (alignmentBlock[index] & 0xF0) >> 4; sequence.Append(GetSeqChar(value, result.Qname)); if (readLen % 2 == 0) { value = alignmentBlock[index] & 0x0F; sequence.Append(GetSeqChar(value, result.Qname)); } startIndex = index + 1; var qualValues = new StringBuilder(); if (alignmentBlock[startIndex] != 0xFF) { for (var i = startIndex; i < (startIndex + readLen); i++) { qualValues.Append((char)(alignmentBlock[i] + 33)); } } else { qualValues.Append(SAMParser.AsteriskAsByte); } loc.Sequence = sequence.ToString(); loc.Qual = qualValues.ToString(); loc.Strand = loc.Flag.HasFlag(SAMFlags.QueryOnReverseStrand) ? '-' : '+'; if (!loc.Flag.HasFlag(SAMFlags.UnmappedQuery)) { startIndex += readLen; if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0) { var options = new List <string>(); for (int i = 0; i < SAMFormat.OptionStartIndex; i++) { options.Add(string.Empty); } for (index = startIndex; index < alignmentBlock.Length;) { var tag = Encoding.ASCII.GetString(alignmentBlock, index, 2); index += 2; var vType = (char)alignmentBlock[index++]; // SAM format supports [AifZH] for value type. // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However, // in SAM, all types of integers are presented as type ʻiʼ. //NOTE: Code previously here checked for valid value and threw an exception here, but this exception/validation is checked for in this method below, as while as when the value is set. var tValue = GetOptionalValue(vType, alignmentBlock, ref index); // Convert to SAM format, where all integers are represented the same way if ("cCsSI".IndexOf(vType) >= 0) { vType = 'i'; } options.Add(string.Format("{0}:{1}:{2}", tag, vType, tValue)); } var optionarrays = options.ToArray(); loc.AlignmentScore = Format.GetAlignmentScore(optionarrays); loc.NumberOfMismatch = Format.GetNumberOfMismatch(optionarrays); loc.MismatchPositions = Format.GetMismatchPositions(optionarrays); } } return(result); }
public SAMAlignedItem NextSAMAlignedItem() { string line; while ((line = _file.ReadLine()) != null) { var parts = line.Split('\t'); var qname = parts[SAMFormatConst.QNAME_INDEX]; var seq = parts[SAMFormatConst.SEQ_INDEX]; var flag = (SAMFlags) int.Parse(parts[SAMFormatConst.FLAG_INDEX]); //unmatched if (flag.HasFlag(SAMFlags.UnmappedQuery)) { continue; } //check map quality var mapq = int.Parse(parts[SAMFormatConst.MAPQ_INDEX]); if (mapq < _options.MinimumReadQuality) { continue; } var sam = new SAMAlignedItem { Qname = qname, }; bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand); char strand; if (isReversed) { strand = '-'; sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq); } else { strand = '+'; sam.Sequence = seq; } var loc = new SAMAlignedLocation(sam) { Seqname = parts[SAMFormatConst.RNAME_INDEX], Start = int.Parse(parts[SAMFormatConst.POS_INDEX]), Strand = strand, Cigar = parts[SAMFormatConst.CIGAR_INDEX], MismatchPositions = _format.GetMismatchPositions(parts), NumberOfMismatch = _format.GetNumberOfMismatch(parts), Sequence = seq, Qual = parts[SAMFormatConst.QUAL_INDEX] }; loc.ParseEnd(sam.Sequence); sam.AddLocation(loc); if (_format.HasAlternativeHits) { _format.ParseAlternativeHits(parts, sam); } return sam; } return null; }
public override void ParseAlternativeHits(string[] parts, SAMAlignedItem item) { var countstr = GetOptionValue(parts, "X0:i:", false); if (string.IsNullOrEmpty(countstr)) { return; } var count = int.Parse(countstr) - 1; if (count == 0) { return; } var xaz = GetOptionValue(parts, "XA:Z:", false); if (string.IsNullOrEmpty(xaz)) { return; } var match = _reg.Match(xaz); for (var i = 0; i < count; i++) { var loc = new SAMAlignedLocation(item) { Seqname = match.Groups[1].Value, Strand = match.Groups[2].Value[0], Start = long.Parse(match.Groups[3].Value) }; loc.End = loc.Start + item.Locations[0].Length - 1; loc.Cigar = match.Groups[4].Value; loc.NumberOfMismatch = int.Parse(match.Groups[5].Value); item.AddLocation(loc); } }
private void FindLocation(List<SAMAlignedLocation> list, List<MappedMirnaRegion> list_2, out SAMAlignedLocation loc, out MappedMirnaRegion reg) { throw new NotImplementedException(); }
public static List<SAMAlignedItem> ToSAMAlignedItems(this XElement root) { var result = new List<SAMAlignedItem>(); foreach (var queryEle in root.Element("queries").Elements("query")) { var query = new SAMAlignedItem(); query.Qname = queryEle.Attribute("name").Value; query.Sequence = queryEle.Attribute("sequence").Value; query.QueryCount = int.Parse(queryEle.Attribute("count").Value); query.Sample = queryEle.GetAttributeValue("sample", null); result.Add(query); foreach (var locEle in queryEle.Elements("location")) { var loc = new SAMAlignedLocation(query); loc.ParseLocation(locEle); loc.Cigar = locEle.Attribute("cigar").Value; loc.AlignmentScore = int.Parse(locEle.Attribute("score").Value); loc.MismatchPositions = locEle.Attribute("mdz").Value; loc.NumberOfMismatch = int.Parse(locEle.Attribute("nmi").Value); var nnmpattr = locEle.Attribute("nnpm"); if (nnmpattr != null) { loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr.Value); } } } return result; }