Ejemplo n.º 1
0
    public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
    {
      List<CoverageRegion> result;
      if (options.TargetFile.EndsWith(".xml"))
      {
        result = GetTargetCoverageRegionFromXml(options, progress);
      }
      else
      {
        result = GetTargetCoverageRegionFromBed(options, progress);
      }

      var dic = result.ToGroupDictionary(m => m.Seqname);

      progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
      using (var sr = new StreamReader(options.GenomeFastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
          var seqname = seq.Name.StringAfter("chr");
          List<CoverageRegion> lst;
          if (dic.TryGetValue(seqname, out lst))
          {
            foreach (var l in lst)
            {
              l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
              if(l.Strand == '+')
              {
                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
              }
            }
          }
        }
      }
      if (removeRegionWithoutSequence)
      {
        result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
      }

      progress.SetMessage("Filling sequence finished.");

      var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);
      result.ForEach(m =>
      {
        var gene = m.Name.StringBefore("_utr3");
        m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
      });

      return result;
    }
Ejemplo n.º 2
0
    public override IEnumerable<string> Process()
    {
      this.Progress.SetMessage("Reading group sample map file ...");
      var groupSampleMap = new MapReader(0, 1).ReadFromFile(options.MapFile);

      Dictionary<string, SignificantItem> geneNameMap = new Dictionary<string, SignificantItem>();

      this.Progress.SetMessage("Reading cuffdiff significant files ...");
      var sigs = (from file in options.SignificantFiles
                  from line in File.ReadAllLines(file).Skip(1)
                  let parts = line.Split('\t')
                  where parts.Length >= 3
                  let gene = parts[1]
                  let name = parts[2]
                  where !name.Equals("-")
                  let location = parts[3]
                  select new SignificantItem() { Gene = gene, GeneName = name, GeneLocation = location }).ToList();
      foreach (var gene in sigs)
      {
        if (!geneNameMap.ContainsKey(gene.Gene))
        {
          geneNameMap[gene.Gene] = gene;
        }
      }
      Func<string, bool> acceptGene = m => geneNameMap.ContainsKey(m);
      var countFile = options.OutputFilePrefix + ".count";
      var fpkmFile = options.OutputFilePrefix + ".fpkm";

      var items = new List<TrackingItem>();
      foreach (var trackingFile in options.InputFiles)
      {
        this.Progress.SetMessage("Reading cuffdiff read_group_tracking file " + trackingFile + "...");
        using (StreamReader sr = new StreamReader(trackingFile))
        {
          string line = sr.ReadLine();
          while ((line = sr.ReadLine()) != null)
          {
            var parts = line.Split('\t');
            if (parts.Length <= 7)
            {
              continue;
            }
            var gene = parts[0];
            if (!acceptGene(gene))
            {
              continue;
            }

            var group_index = parts[1] + "_" + parts[2];
            var sample = groupSampleMap.ContainsKey(group_index) ? groupSampleMap[group_index] : group_index;
            var count = parts[3];
            var fpkm = parts[6];
            var item = new TrackingItem() { Gene = gene, Sample = sample, Count = count, FPKM = fpkm };
            items.Add(item);
          }
        }
      }

      this.Progress.SetMessage("Preparing result ...");
      var samples = new HashSet<string>(from item in items
                                        select item.Sample).OrderBy(m => m).ToList();
      this.Progress.SetMessage(string.Format("There are {0} samples", samples.Count));

      var genes = new HashSet<string>(from item in items
                                      select item.Gene).OrderBy(m => m).ToList();
      this.Progress.SetMessage(string.Format("There are {0} genes", genes.Count));

      var map = ToDoubleDirectory(items);

      this.Progress.SetMessage("Removing empty genes ...");
      foreach (var gene in genes)
      {
        if (map[gene].All(m => m.Value.Count == "0"))
        {
          map.Remove(gene);
        }
      }

      var finalGenes = map.Keys.OrderBy(m => m).ToList();

      this.Progress.SetMessage("Outputing result ...");
      OutputFile(samples, finalGenes, map, geneNameMap, m => Math.Round(double.Parse(m.Count)).ToString(), countFile);
      OutputFile(samples, finalGenes, map, geneNameMap, m => m.FPKM, fpkmFile);

      this.Progress.End();
      return new[] { countFile, fpkmFile };
    }