Example #1
0
 public PileupItem CloneByFilter(Func<PileupBase, bool> accept)
 {
   var result = new PileupItem();
   foreach (var sample in Samples)
   {
     var cloneSample = new PileupBaseList();
     cloneSample.SampleName = sample.SampleName;
     cloneSample.AddRange(from s in sample where accept(s) select s);
     result.Samples.Add(cloneSample);
   }
   return result;
 }
Example #2
0
    private void ParseMatchBase(PileupItem result, PileupBaseList pbl, PileupBase pb, string seq, string scores, string[] positions,
      int seqLength, ref int baseIndex, ref int scoreIndex)
    {
      pb.Score = scores[scoreIndex] - 33;
      pb.PositionInRead = positions == null ? string.Empty : positions[scoreIndex];
      scoreIndex++;

      //Only the base whose quality passed the criteria will be parsed.
      bool bScorePassed = _acceptScore(pb);
      if (bScorePassed)
      {
        //A dot stands for a match to the reference base on the forward strand, 
        switch (seq[baseIndex])
        {
          case '.':
            pb.Strand = StrandType.FORWARD;
            AssignMatch(result, pb);
            break;
          case ',':
            pb.Strand = StrandType.REVERSE;
            AssignMatch(result, pb);
            break;
          default:
            pb.Strand = char.IsUpper(seq[baseIndex]) ? StrandType.FORWARD : StrandType.REVERSE;
            pb.EventType = AlignedEventType.MISMATCH;
            pb.Event = seq[baseIndex].ToString().ToUpper();
            break;
        }
      }
      baseIndex++;

      //is it the end of read?
      if (baseIndex < seqLength && seq[baseIndex] == '$')
      {
        pb.Position = PositionType.END;
        baseIndex++;
      }

      if (bScorePassed && _acceptTerminal(pb))
      {
        pbl.Add(pb);
      }
    }
Example #3
0
    public PileupItem GetValue(string[] parts)
    {
      if (!Accept(parts))
      {
        return null;
      }

      var result = new PileupItem
      {
        SequenceIdentifier = parts[0],
        Position = long.Parse(parts[1]),
        Nucleotide = parts[2][0]
      };

      var sampleIndex = 0;
      for (var countIndex = 3; countIndex < parts.Length; countIndex += _columnEachSample)
      {
        var pbl = new PileupBaseList();

        var seq = parts[countIndex + 1];
        var scores = parts[countIndex + 2];
        string[] positions = null;
        if (_columnEachSample > 3)
        {
          positions = parts[countIndex + 3].Split(',');
        }

        var seqLength = seq.Length;

        var baseIndex = 0;
        var scoreIndex = 0;
        var positionIndex = 0;
        while (baseIndex < seqLength)
        {
          //A ’>’ or ’<’ for a reference skip.
          //The deleted bases will be presented as ‘*’ in the following lines. 
          if (seq[baseIndex] == '>' || seq[baseIndex] == '<' || seq[baseIndex] == '*')
          {
            baseIndex++;
            scoreIndex++;
            positionIndex++;
            continue;
          }

          var pb = new PileupBase();

          //Is it the start of read?
          if (seq[baseIndex] == '^')
          {
            pb.Position = PositionType.START;
            pb.ReadMappingQuality = seq[baseIndex + 1] - 33;
            baseIndex += 2;
            ParseMatchBase(result, pbl, pb, seq, scores, positions, seqLength, ref baseIndex, ref scoreIndex);
          }
          else if (Matches.Contains(seq[baseIndex]))
          {
            pb.Position = PositionType.MIDDLE;
            ParseMatchBase(result, pbl, pb, seq, scores, positions, seqLength, ref baseIndex, ref scoreIndex);
          }
          //A pattern ‘\+[0-9]+[ACGTNacgtn]+’ indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern ‘-[0-9]+[ACGTNacgtn]+’ represents a deletion from the reference.
          else if (seq[baseIndex] == '+' || seq[baseIndex] == '-')
          {
            if (_ignoreInsertionDeletion)
            {
              //ignore and move to next base
              baseIndex++;

              var num = ParseInsertionDeletionCount(seq, seqLength, ref baseIndex);
              baseIndex += num;
            }
            else
            {
              pb.Position = PositionType.MIDDLE;

              var id = seq[baseIndex];
              pb.EventType = id == '+' ? AlignedEventType.INSERTION : AlignedEventType.DELETION;
              baseIndex++;

              //get the sequence of insertion/deletion
              var num = ParseInsertionDeletionCount(seq, seqLength, ref baseIndex);
              var idseq = seq.Substring(baseIndex, num);
              pb.Event = string.Format("{0}{1}{2}", id, num, idseq.ToUpper());
              pb.Strand = char.IsUpper(idseq[0]) ? StrandType.FORWARD : StrandType.REVERSE;
              baseIndex += num;

              pbl.Add(pb);
            }

            if (baseIndex < seqLength && seq[baseIndex] == '$')
            {
              pb.Position = PositionType.END;
              baseIndex++;
            }
          }
          else
          {
            throw new Exception(string.Format("I don't know the mean of character {0}", seq[baseIndex]));
          }
        }

        if (pbl.Count < _minReadDepth)
        {
          return null;
        }

        sampleIndex++;
        pbl.SampleName = "S" + sampleIndex;
        pbl.InitEventCountList();
        result.Samples.Add(pbl);
      }

      return result;
    }
Example #4
0
    private void ParseSlimMatchBase(PileupItem result, PileupBaseList pbl, PileupBase pb, string seq, int seqLength, ref int baseIndex)
    {
      //A dot stands for a match to the reference base on the forward strand, 
      switch (seq[baseIndex])
      {
        case '.':
          AssignMatch(result, pb);
          break;
        case ',':
          AssignMatch(result, pb);
          break;
        default:
          pb.EventType = AlignedEventType.MISMATCH;
          pb.Event = seq[baseIndex].ToString().ToUpper();
          break;
      }
      baseIndex++;

      //is it the end of read?
      if (baseIndex < seqLength && seq[baseIndex] == '$')
      {
        baseIndex++;
      }
      pbl.Add(pb);
    }
Example #5
0
    //Get major and minor allele only, without score and position information
    public PileupItem GetSlimValue(string[] parts)
    {
      if (!Accept(parts))
      {
        return null;
      }

      var result = new PileupItem
      {
        SequenceIdentifier = parts[0],
        Position = long.Parse(parts[1]),
        Nucleotide = parts[2][0]
      };

      var sampleIndex = 0;
      for (var countIndex = 3; countIndex < parts.Length; countIndex += _columnEachSample)
      {
        var pbl = new PileupBaseList();

        var seq = parts[countIndex + 1];
        var seqLength = seq.Length;

        var baseIndex = 0;
        while (baseIndex < seqLength)
        {
          //A ’>’ or ’<’ for a reference skip.
          //The deleted bases will be presented as ‘*’ in the following lines. 
          if (seq[baseIndex] == '>' || seq[baseIndex] == '<' || seq[baseIndex] == '*')
          {
            baseIndex++;
            continue;
          }

          var pb = new PileupBase();

          //Is it the start of read?
          if (seq[baseIndex] == '^')
          {
            baseIndex += 2;
            ParseSlimMatchBase(result, pbl, pb, seq, seqLength, ref baseIndex);
          }
          else if (Matches.Contains(seq[baseIndex]))
          {
            ParseSlimMatchBase(result, pbl, pb, seq, seqLength, ref baseIndex);
          }
          //A pattern ‘\+[0-9]+[ACGTNacgtn]+’ indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern ‘-[0-9]+[ACGTNacgtn]+’ represents a deletion from the reference.
          else if (seq[baseIndex] == '+' || seq[baseIndex] == '-')
          {
            //ignore and move to next base
            baseIndex++;

            var num = ParseInsertionDeletionCount(seq, seqLength, ref baseIndex);
            baseIndex += num;

            if (baseIndex < seqLength && seq[baseIndex] == '$')
            {
              baseIndex++;
            }
          }
          else
          {
            throw new Exception(string.Format("I don't know the mean of character {0}", seq[baseIndex]));
          }
        }

        if (pbl.Count < _minReadDepth)
        {
          return null;
        }

        sampleIndex++;
        pbl.SampleName = "S" + sampleIndex;
        pbl.InitEventCountList();
        result.Samples.Add(pbl);
      }

      return result;
    }
Example #6
0
 public static void PrepareCount(FisherExactTestResult.Sample sample, PairedEvent events, PileupBaseList bases)
 {
   foreach (var b in bases)
   {
     if (b.Event.Equals(events.MajorEvent))
     {
       sample.Succeed++;
     }
     else if (b.Event.Equals(events.MinorEvent))
     {
       sample.Failed++;
     }
   }
 }