Ejemplo n.º 1
0
    public int AddByScore(SAMAlignedItem item, int count, int minScore)
    {
      int result = 0;
      string align, score;
      item.GetSequenceScore(out align, out score);

      for (int i = 0; i < align.Length; i++)
      {
        if (minScore > 0)
        {
          var bq = (int)(score[i]) - 33;
          if (bq < minScore)
          {
            result++;
            continue;
          }
        }

        var c = align[i];
        var dic = Count[(int)(item.Pos) + i];
        int curcount = 0;
        if (dic.TryGetValue(c, out curcount))
        {
          dic[c] = curcount + count;
        }
        else
        {
          dic[c] = count;
        }
      }

      return result;
    }
Ejemplo n.º 2
0
 public void TestParseAlternativeHits()
 {
   var bwaformat = new BwaFormat();
   var sam = new SAMAlignedItem();
   Assert.IsTrue(bwaformat.HasAlternativeHits);
   var parts = "HWI-ST508:275:D2A2JACXX:3:1105:21234:49676\t0\t1_hsa\t564952\t0\t36M\t*\t0\t0\tAGTAAGGTCAGCTAATTAAGCTATCGGGCCCATAAA\[email protected]@?DDFF?FFFDBHIIJIJIJJGJJJJJJJJJJJJJ\tRG:Z:2570-KCV-01-19\tXT:A:R\tNM:i:3\tX0:i:4\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:15A18C0C0\tXA:Z:M_hsa,+4403,36M,3;17_hsa,+19506660,36M,3;X_hsa,-55206629,36M,3;".Split('\t');
   bwaformat.ParseAlternativeHits(parts, sam);
 }
Ejemplo n.º 3
0
 public SAMAlignedLocation(SAMAlignedItem parent)
 {
   this.Features = new List<ISequenceRegion>();
   if (parent != null)
   {
     parent.AddLocation(this);
   }
 }
Ejemplo n.º 4
0
    public List<PileupCount> Add(SAMAlignedItem item, int count)
    {
      List<PileupCount> result = null;

      if (!item.Locations[0].Seqname.Equals(this.Chromosome))
      {
        result = Count;
        Count = new List<PileupCount>();
      }
      else if (this.Position != -1)
      {
        if (item.Pos > this.Count.Last().Position)
        {
          result = Count;
          Count = new List<PileupCount>();
        }
        else
        {
          int finishedCount = (int)(item.Pos - this.Position);
          if (finishedCount > 0)
          {
            result = new List<PileupCount>();
            result.AddRange(Count.Take(finishedCount));
            Count.RemoveRange(0, finishedCount);
          }
        }
      }

      string align, refer;
      item.GetSequences(out align, out refer);
      for (int i = Count.Count; i < align.Length; i++)
      {
        Count.Add(new PileupCount()
        {
          Chromosome = item.Locations[0].Seqname,
          Position = item.Locations[0].Start + i,
          Reference = refer[i]
        });
      }

      for (int i = 0; i < align.Length; i++)
      {
        var c = align[i];
        var dic = Count[i];
        int curcount = 0;
        if (dic.TryGetValue(c, out curcount))
        {
          dic[c] = curcount + count;
        }
        else
        {
          dic[c] = count;
        }
      }

      return result;
    }
Ejemplo n.º 5
0
    public void TestGetGetSequences()
    {
      SAMAlignedItem item = new SAMAlignedItem();
      item.AddLocation(new SAMAlignedLocation(item)
      {
        Cigar = "5S18M2D19M5S",
        Start = 39979942,
        MismatchPositions = "18^CA10T8",
        Sequence = "aaaaaGTAGTACCAACTGTAAGTCCTTATCTTCATACTTTGTaaaaa"
      });

      string align, refer;
      item.GetSequences(out align, out refer);

      Assert.AreEqual("GTAGTACCAACTGTAAGT  CCTTATCTTCATACTTTGT", align);
      Assert.AreEqual("GTAGTACCAACTGTAAGTCACCTTATCTTCTTACTTTGT", refer);
    }
Ejemplo n.º 6
0
    public static List<SAMAlignedItem> ReadFrom(XmlReader source)
    {
      var result = new List<SAMAlignedItem>();

      source.ReadToFollowing("queries");
      if (source.ReadToDescendant("query"))
      {
        do
        {
          var query = new SAMAlignedItem();
          result.Add(query);

          query.Qname = source.GetAttribute("name");
          query.Sequence = source.GetAttribute("sequence");
          query.QueryCount = int.Parse(source.GetAttribute("count"));
          query.Sample = source.GetAttribute("sample");
          if (source.ReadToDescendant("location"))
          {
            do
            {
              var loc = new SAMAlignedLocation(query);

              loc.Seqname = source.GetAttribute("seqname");
              loc.Start = long.Parse(source.GetAttribute("start"));
              loc.End = long.Parse(source.GetAttribute("end"));
              loc.Strand = source.GetAttribute("strand")[0];
              loc.Cigar = source.GetAttribute("cigar");
              loc.AlignmentScore = int.Parse(source.GetAttribute("score"));
              loc.MismatchPositions = source.GetAttribute("mdz");
              loc.NumberOfMismatch = int.Parse(source.GetAttribute("nmi"));
              var nnmpattr = source.GetAttribute("nnpm");
              if (nnmpattr != null)
              {
                loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr);
              }
            } while (source.ReadToNextSibling("location"));
          }
        } while (source.ReadToNextSibling("query"));
      }

      return result;
    }
Ejemplo n.º 7
0
    public void Add(SAMAlignedItem item, int count)
    {
      string align, refer;
      item.GetSequences(out align, out refer);

      for (int i = 0; i < align.Length; i++)
      {
        var c = align[i];
        var dic = Count[(int)(item.Pos) + i];
        int curcount = 0;
        if (dic.TryGetValue(c, out curcount))
        {
          dic[c] = curcount + count;
        }
        else
        {
          dic[c] = count;
        }
      }
    }
Ejemplo n.º 8
0
    public void TestAdd()
    {
      SAMAlignedItem item1 = new SAMAlignedItem()
      {
        Sequence = "CTCTTAGATCGATGTGGTGCTCCGGAAAAAA",
      };
      item1.AddLocation(new SAMAlignedLocation(item1)
      {
        Seqname  ="chr13",
        Cigar = "5S21M5S",
        MismatchPositions = "10T10",
        Start = 39979942,
        Sequence = "CTCTTAGATCGATGTGGTGCTCCGGAAAAAA"
      });

      SAMAlignedItem item2 = new SAMAlignedItem()
      {
        Sequence = "GATGTAGTGCTCCGGATTTTT"
      };
      item2.AddLocation(new SAMAlignedLocation(item2)
      {
        Seqname = "chr13",
        Cigar = "21M",
        MismatchPositions = "5T15",
        Start = 39979947,
        Sequence = "GATGTAGTGCTCCGGATTTTT"
      });

      List<PileupCount> all = new List<PileupCount>();

      PileupCountList count = new PileupCountList();
      var res1 = count.Add(item1, 2);
      Assert.AreEqual(0, res1.Count);

      var res2 = count.Add(item2, 3);
      Assert.AreEqual(5, res2.Count);
      all.AddRange(res2);

      for (int i = 0; i < res2.Count; i++)
      {
        Assert.AreEqual(item1.Sequence[5 + i], res2[i].Reference);
        Assert.AreEqual(item1.Sequence[5 + i], res2[i].First().Key);
        Assert.AreEqual(2, res2[i].First().Value);
        Assert.AreEqual(item1.Locations[0].Seqname, res2[i].Chromosome);
        Assert.AreEqual(item1.Pos + i, res2[i].Position);
      }

      item1.Locations[0].Seqname = "chr14";
      var res3 = count.Add(item1, 2);
      Assert.AreEqual(21, res3.Count);
      all.AddRange(res3);
      
      for (int i = 0; i < 16; i++)
      {
        if (i == 5)
        {
          Assert.AreEqual('T', res3[i].Reference);
          Assert.True(res3[i].ContainsKey('G'));
          Assert.AreEqual(2, res3[i]['G']);
          Assert.True(res3[i].ContainsKey('A'));
          Assert.AreEqual(3, res3[i]['A']);
        }
        else
        {
          Assert.AreEqual(item2.Sequence[i], res3[i].Reference);
          Assert.AreEqual(item2.Sequence[i], res3[i].First().Key);
          Assert.AreEqual(5, res3[i].First().Value);
        }
      }

      for (int i = 16; i < 21; i++)
      {
        Assert.AreEqual(item2.Sequence[i], res3[i].First().Key);
        Assert.AreEqual(3, res3[i].First().Value);
      }

      var res4 = count.Count;
      Assert.AreEqual(21, res4.Count);
      all.AddRange(res4);

      for (int i = 0; i < res4.Count; i++)
      {
        Assert.AreEqual(item1.Sequence[5 + i], res4[i].First().Key);
        Assert.AreEqual(2, res4[i].First().Value);
        Assert.AreEqual(item1.Locations[0].Seqname, res4[i].Chromosome);
        Assert.AreEqual(item1.Pos + i, res4[i].Position);
      }

      //all.ForEach(m => Output(m));
    }
Ejemplo n.º 9
0
    /// <summary>
    /// Add alignment result and return the completed positions
    /// </summary>
    /// <param name="item">alignment result</param>
    /// <returns>completed positions</returns>
    public List<AlignedPositionMap> Add(SAMAlignedItem item)
    {
      List<AlignedPositionMap> result = null;

      //if the alignment result moves to another chromosome, all uncompleted positions
      //will be completed.
      if (!item.Locations[0].Seqname.Equals(this.Chromosome))
      {
        result = Positions;
        Positions = new List<AlignedPositionMap>();
        PositionMap = new Dictionary<long, AlignedPositionMap>();
      }
      else if (this.Position != -1)
      {
        //if the alignment result position is larger than the last position in the uncompleted positions,
        //all uncompleted positions will be completed.
        if (item.Pos > this.Positions.Last().Position)
        {
          result = Positions;
          Positions = new List<AlignedPositionMap>();
          PositionMap = new Dictionary<long, AlignedPositionMap>();
        }
        else
        {
          //set up the completed list
          result = new List<AlignedPositionMap>();
          while (Positions[0].Position < item.Pos)
          {
            result.Add(Positions[0]);
            PositionMap.Remove(Positions[0].Position);
            Positions.RemoveAt(0);
          }
        }
      }

      List<AlignedPosition> align = item.GetAlignedPositions();
      foreach (var asp in align)
      {
        AlignedPositionMap dic;
        if (!PositionMap.TryGetValue(asp.Position, out dic))
        {
          dic = new AlignedPositionMap()
          {
            Chromosome = item.Locations[0].Seqname,
            Position = asp.Position
          };
          Positions.Add(dic);
          PositionMap[dic.Position] = dic;
        }

        List<AlignedPosition> curcount;
        if (!dic.TryGetValue(asp.AlignedEvent, out curcount))
        {
          curcount = new List<AlignedPosition>();
          dic[asp.AlignedEvent] = curcount;
        }
        curcount.Add(asp);
      }

      return result;
    }
Ejemplo n.º 10
0
    public SAMAlignedItem NextSAMAlignedItem()
    {
      string line;
      while ((line = _file.ReadLine()) != null)
      {
        var parts = line.Split('\t');

        var qname = parts[SAMFormatConst.QNAME_INDEX];
        var seq = parts[SAMFormatConst.SEQ_INDEX];

        var flag = (SAMFlags) int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
        //unmatched
        if (flag.HasFlag(SAMFlags.UnmappedQuery))
        {
          continue;
        }

        //check map quality
        var mapq = int.Parse(parts[SAMFormatConst.MAPQ_INDEX]);
        if (mapq < _options.MinimumReadQuality)
        {
          continue;
        }

        var sam = new SAMAlignedItem
        {
          Qname = qname,
        };

        bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
        char strand;
        if (isReversed)
        {
          strand = '-';
          sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
        }
        else
        {
          strand = '+';
          sam.Sequence = seq;
        }

        var loc = new SAMAlignedLocation(sam)
        {
          Seqname = parts[SAMFormatConst.RNAME_INDEX],
          Start = int.Parse(parts[SAMFormatConst.POS_INDEX]),
          Strand = strand,
          Cigar = parts[SAMFormatConst.CIGAR_INDEX],
          MismatchPositions = _format.GetMismatchPositions(parts),
          NumberOfMismatch = _format.GetNumberOfMismatch(parts),
          Sequence = seq,
          Qual = parts[SAMFormatConst.QUAL_INDEX]
        };

        loc.ParseEnd(sam.Sequence);
        sam.AddLocation(loc);

        if (_format.HasAlternativeHits)
        {
          _format.ParseAlternativeHits(parts, sam);
        }

        return sam;
      }

      return null;
    }
Ejemplo n.º 11
0
 public virtual void ParseAlternativeHits(string[] parts, SAMAlignedItem target)
 {
 }
Ejemplo n.º 12
0
    public override void ParseAlternativeHits(string[] parts, SAMAlignedItem item)
    {
      var countstr = GetOptionValue(parts, "X0:i:", false);
      if (string.IsNullOrEmpty(countstr))
      {
        return;
      }

      var count = int.Parse(countstr) - 1;
      if (count == 0)
      {
        return;
      }

      var xaz = GetOptionValue(parts, "XA:Z:", false);
      if (string.IsNullOrEmpty(xaz))
      {
        return;
      }

      var match = _reg.Match(xaz);
      for (var i = 0; i < count; i++)
      {
        var loc = new SAMAlignedLocation(item)
        {
          Seqname = match.Groups[1].Value,
          Strand = match.Groups[2].Value[0],
          Start = long.Parse(match.Groups[3].Value)
        };
        loc.End = loc.Start + item.Locations[0].Length - 1;
        loc.Cigar = match.Groups[4].Value;
        loc.NumberOfMismatch = int.Parse(match.Groups[5].Value);
        item.AddLocation(loc);
      }
    }
Ejemplo n.º 13
0
    public override IEnumerable<string> Process()
    {
      PileupCountList pc = new PileupCountList();

      var format = options.GetSAMFormat();

      var cm = new SmallRNACountMap(options.CountFile);

      var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf);
      srItems.ForEach(m =>
      {
        m.Seqname = m.Seqname.StringAfter("chr");
      });
      var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList());

      StreamWriter swScript = null;
      try
      {
        if (options.ExportIgvScript)
        {
          swScript = new StreamWriter(options.OutputFile + ".igv");
          swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/'));
        }

        using (StreamWriter sw = new StreamWriter(options.OutputFile))
        {
          sw.WriteLine(@"##fileformat=VCFv4.2
##fileDate={0:yyyyMMdd}
##source={1}
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data"">
##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth"">
##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency"">
##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value"">
##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position"">
##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}"">
##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}"">
##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus"">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth"">
##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth"">
#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  {4}",
    DateTime.Now,
    "PileupCountBuilder",
    options.FisherPValue,
    options.MinimumAlternativeAlleleFrequency,
    Path.GetFileNameWithoutExtension(options.InputFile));

          using (var sr = SAMFactory.GetReader(options.InputFile, true))
          {
            int count = 0;
            string line;
            while ((line = sr.ReadLine()) != null)
            {
              count++;

              if (count % 100 == 0)
              {
                if (Progress.IsCancellationPending())
                {
                  throw new UserTerminatedException();
                }
              }

              if (count % 100000 == 0)
              {
                Progress.SetMessage("{0} reads processed", count);
              }

              var parts = line.Split('\t');

              var qname = parts[SAMFormatConst.QNAME_INDEX];
              var seq = parts[SAMFormatConst.SEQ_INDEX];

              //too short
              if (seq.Length < options.MinimumReadLength)
              {
                continue;
              }

              SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
              //unmatched
              if (flag.HasFlag(SAMFlags.UnmappedQuery))
              {
                continue;
              }

              var cigar = parts[SAMFormatConst.CIGAR_INDEX];
              //insertion/deletion
              if (cigar.Any(m => m == 'I' || m == 'D'))
              {
                continue;
              }

              var sam = new SAMAlignedItem()
              {
                Qname = qname,
              };

              bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
              char strand;
              if (isReversed)
              {
                strand = '-';
                sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
              }
              else
              {
                strand = '+';
                sam.Sequence = seq;
              }

              var loc = new SAMAlignedLocation(sam)
              {
                Seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"),
                Start = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                Strand = strand,
                Cigar = parts[SAMFormatConst.CIGAR_INDEX],
                MismatchPositions = format.GetMismatchPositions(parts),
                NumberOfMismatch = format.GetNumberOfMismatch(parts),
                Sequence = seq
              };

              loc.ParseEnd(sam.Sequence);
              sam.AddLocation(loc);

              if (format.HasAlternativeHits)
              {
                format.ParseAlternativeHits(parts, sam);
              }

              var finished = pc.Add(sam, cm.GetCount(sam.Qname));
              if (null == finished || 0 == finished.Count)
              {
                continue;
              }

              foreach (var fin in finished)
              {
                //if (fin.Chromosome.Equals("1") && fin.Position == 5160725)
                //{
                //  Console.WriteLine(fin);
                //}
                var ft = fin.FisherExactTest();
                if (ft.PValue <= options.FisherPValue)
                {
                  var total = fin.Sum(m => m.Value);
                  var minallele = total * options.MinimumAlternativeAlleleFrequency;
                  if (ft.Sample2.Failed >= minallele)
                  {
                    List<GtfItem> srs;
                    List<string> ranges = new List<string>();

                    if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs))
                    {
                      foreach (var seqr in srs)
                      {
                        if (seqr.Contains(fin.Position))
                        {
                          ranges.Add(seqr.GetNameLocation());
                        }
                      }
                    }

                    var alter = (from r in fin
                                 where r.Key != fin.Reference
                                 orderby r.Key
                                 select r).ToList();

                    var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}",
                      fin.Chromosome,
                      fin.Position,
                      ".",
                      fin.Reference,
                      (from r in alter
                       select r.Key.ToString()).Merge(","),
                      0,
                      ranges.Count == 0 ? "notMiRNA" : "PASS",
                      1,
                      total,
                      (from r in alter
                       select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","),
                      ft.PValue,
                      ranges.Count == 0 ? "" : ";" + ranges.Merge(","),
                      total,
                      ft.Sample2.Succeed,
                      (from r in alter
                       select r.Value.ToString()).Merge(","));

                    sw.WriteLine(str);
                    //Console.WriteLine(str);

                    if (swScript != null && ranges.Count > 0)
                    {
                      swScript.WriteLine(@"goto {0}:{1}
sort position
snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_'));
                    }
                  }
                }
              }

              finished.Clear();
            }
          }
        }
      }
      finally
      {
        if (swScript != null)
        {
          swScript.Close();
        }
      }
      return new string[] { options.OutputFile };
    }
Ejemplo n.º 14
0
 public static List<SAMAlignedItem> ToSAMAlignedItems(this XElement root)
 {
   var result = new List<SAMAlignedItem>();
   foreach (var queryEle in root.Element("queries").Elements("query"))
   {
     var query = new SAMAlignedItem();
     query.Qname = queryEle.Attribute("name").Value;
     query.Sequence = queryEle.Attribute("sequence").Value;
     query.QueryCount = int.Parse(queryEle.Attribute("count").Value);
     query.Sample = queryEle.GetAttributeValue("sample", null);
     result.Add(query);
     foreach (var locEle in queryEle.Elements("location"))
     {
       var loc = new SAMAlignedLocation(query);
       loc.ParseLocation(locEle);
       loc.Cigar = locEle.Attribute("cigar").Value;
       loc.AlignmentScore = int.Parse(locEle.Attribute("score").Value);
       loc.MismatchPositions = locEle.Attribute("mdz").Value;
       loc.NumberOfMismatch = int.Parse(locEle.Attribute("nmi").Value);
       var nnmpattr = locEle.Attribute("nnpm");
       if (nnmpattr != null)
       {
         loc.NumberOfNoPenaltyMutation = int.Parse(nnmpattr.Value);
       }
     }
   }
   return result;
 }