コード例 #1
0
ファイル: ParclipUtils.cs プロジェクト: shengqh/CQS.Core
    public static List<CoverageRegion> GetSmallRNACoverageRegion(string mappedFeatureXmlFile, string[] includeSmallRNATags = null, string[] excudeSmallRNATags = null)
    {
      var smallRNAGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedFeatureXmlFile);

      if (includeSmallRNATags != null && includeSmallRNATags.Length > 0)
      {
        smallRNAGroups.ForEach(m => m.RemoveAll(l => includeSmallRNATags.All(k => !m.Name.StartsWith(k))));
        smallRNAGroups.RemoveAll(m => m.Count == 0);
      }

      if (excudeSmallRNATags != null && excudeSmallRNATags.Length > 0)
      {
        smallRNAGroups.ForEach(m => m.RemoveAll(l => excudeSmallRNATags.Any(k => m.Name.StartsWith(k))));
        smallRNAGroups.RemoveAll(m => m.Count == 0);
      }

      var result = new List<CoverageRegion>();
      foreach (var sg in smallRNAGroups)
      {
        //since the items in same group shared same reads, only the first one will be used.
        var smallRNA = sg[0];
        smallRNA.Name = (from g in sg select g.Name).Merge("/");

        smallRNA.Locations.RemoveAll(m => m.SamLocations.Count == 0);
        smallRNA.CombineLocationByMappedReads();

        //only first location will be used.
        var loc = smallRNA.Locations[0];

        //coverage in all position will be set as same as total query count
        var rg = new CoverageRegion();
        rg.Name = smallRNA.Name;
        rg.Seqname = loc.Seqname;
        rg.Start = loc.Start;
        rg.End = loc.End;
        rg.Strand = loc.Strand;
        rg.Sequence = loc.Sequence;

        var coverage = (from sloc in loc.SamLocations select sloc.SamLocation.Parent.QueryCount).Sum();

        for (int i = 0; i < loc.Length; i++)
        {
          rg.Coverages.Add(coverage);
        }
        result.Add(rg);
      }
      return result;
    }
コード例 #2
0
    public List<FeatureItemGroup> Build(string countXmlFile)
    {
      var result = new FeatureItemGroupXmlFormat().ReadFromFile(countXmlFile);

      Progress.SetMessage("There are {0} groups in {1}", result.Count, countXmlFile);

      result.ForEach(g => g.ForEach(smallRNA => smallRNA.Locations.ForEach(region => region.QueryCountBeforeFilter = region.QueryCount)));

      //no number of no penalty mutation defined, check the T2C
      if (result.All(m => m.All(l => l.Locations.All(k => k.SamLocations.All(s => s.NumberOfNoPenaltyMutation == 0)))))
      {
        foreach (var group in result)
        {
          foreach (var smallRNA in group)
          {
            smallRNA.Locations.RemoveAll(m => m.SamLocations.Count == 0);
            foreach (var region in smallRNA.Locations)
            {
              region.SamLocations.ForEach(q =>
              {
                var snp = q.SamLocation.GetNotGsnapMismatch(q.SamLocation.Parent.Sequence);
                if (null != snp && snp.IsMutation('T', 'C'))
                {
                  q.NumberOfMismatch = q.SamLocation.NumberOfMismatch - 1;
                  q.NumberOfNoPenaltyMutation = 1;
                }
                else
                {
                  q.NumberOfMismatch = q.SamLocation.NumberOfMismatch;
                  q.NumberOfNoPenaltyMutation = 0;
                }
              });
            }
          }
        }
      }

      result.RemoveAll(m =>
      {
        m.RemoveAll(l =>
        {
          l.Locations.RemoveAll(k =>
          {
            k.SamLocations.RemoveAll(s => s.NumberOfNoPenaltyMutation == 0);
            return k.SamLocations.Count == 0;
          });

          return l.Locations.Count == 0;
        });

        return m.Count == 0;
      });

      Progress.SetMessage("There are {0} groups having T2C mutation", result.Count);

      foreach (var group in result)
      {
        foreach (var smallRNA in group)
        {
          foreach (var region in smallRNA.Locations)
          {
            region.PValue = CalculateT2CPvalue(region.QueryCountBeforeFilter, region.QueryCount, this.t2cRate);
          }
        }
      }

      return result;
    }