Пример #1
0
 protected virtual void ReadCountFile(string countFile)
 {
   Dictionary<string, string> counts = new MapReader(0, 1).ReadFromFile(countFile);
   foreach (var c in counts)
   {
     Counts[c.Key] = int.Parse(c.Value);
   }
 }
Пример #2
0
 public void TestRead()
 {
   var map = new MapReader("Derived Array Data File", "Comment [Aliquot Barcode]").ReadFromFile("../../../data/broad.mit.edu_BRCA.Genome_Wide_SNP_6.sdrf.txt");
   Assert.AreEqual(6789, map.Count);
   Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.hg18.seg.txt"]);
   Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.hg19.seg.txt"]);
   Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.nocnv_hg18.seg.txt"]);
   Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.nocnv_hg19.seg.txt"]);
 }
Пример #3
0
    public FindParticipantBySdrfFile(string sdrfFile, string fileColumn, string barCodeColumn)
    {
      this.sdrfFile = sdrfFile;

      var anns = new MapReader(fileColumn, barCodeColumn).ReadFromFile(sdrfFile);
      maps = new Dictionary<string, string>();
      foreach (var ann in anns)
      {
        if (ann.Key.Equals("->"))
        {
          continue;
        }

        maps[ann.Key.ToLower()] = TCGAUtils.GetSampleBarCode(ann.Value);
      }
    }
Пример #4
0
    public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap)
    {
      //var files = GeoUtils.GetGsmNameFileMap(datasetDirectory);

      var dirname = Path.GetFileName(datasetDirectory);

      var map = new MapReader("geo_accn", "er").ReadFromFile(datasetDirectory + @"\GSE2990_suppl_info.txt");
      foreach (var m in map)
      {
        var f = datasetDirectory + "\\" + m.Key + ".cel";
        if (File.Exists(f))
        {
          if (!sampleMap.ContainsKey(m.Key))
          {
            sampleMap[m.Key] = new BreastCancerSampleItem(dirname, m.Key);
          }

          var item = sampleMap[m.Key];
          item.ER = m.Value;
        }
      }
    }
Пример #5
0
    /// <summary>
    /// After chip type check, remove file from specified chip types
    /// </summary>
    public void FilterChipType(string chipTypeTableFileName, string[] removeChipTypes)
    {
      chipTypeTableFileName = CheckFileName(chipTypeTableFileName);

      var dic = new MapReader(0, 1).ReadFromFile(chipTypeTableFileName);

      foreach (var cel in dic)
      {
        var chiptype = cel.Value;
        if (removeChipTypes.Contains(chiptype))
        {
          var celfile = cel.Key;
          if (File.Exists(celfile))
          {
            Console.WriteLine("Removing {0} : {1}", chiptype, celfile);

            ExcludeFile(celfile);
          }
        }
      }
    }
    public override IEnumerable<string> Process()
    {
      Dictionary<string, List<GtfItem>> map = new Dictionary<string, List<GtfItem>>();

      var namemap = new Dictionary<string, string>();
      if (File.Exists(options.MapFile))
      {
        namemap = new MapReader(0, 1, hasHeader: false).ReadFromFile(options.MapFile);
      }

      using (var gtf = new GtfItemFile(options.InputFile))
      {
        GtfItem item;
        int count = 0;
        while ((item = gtf.Next()) != null)
        {
          count++;
          if ((count % 100000) == 0)
          {
            Progress.SetMessage("{0} gtf item processed", count);
          }
          List<GtfItem> oldItems;
          if (!map.TryGetValue(item.GeneId, out oldItems))
          {
            map[item.GeneId] = new[] { item }.ToList();
          }
          else
          {
            if (IsExon(item))
            {
              oldItems.RemoveAll(m => !IsExon(m));
              oldItems.Add(item);
            }
            else
            {
              if (oldItems.All(m => !IsExon(m)))
              {
                oldItems.Add(item);
              }
            }
          }
        }
      }

      //      map[item.GeneId] = item.Attributes.StringAfter("gene_name \"").StringBefore("\"");
      var keys = (from key in map.Keys
                  orderby key
                  select key).ToList();

      using (StreamWriter sw = new StreamWriter(options.OutputFile))
      {
        bool bHasGeneName = map.Values.Any(l => l.Any(m => m.Attributes.Contains("gene_name")));
        if (!bHasGeneName  && !File.Exists(options.MapFile))
        {
          throw new Exception(string.Format("No gene_name found in {0} and no id/name map file defined.", options.InputFile));
        }

        bool bHasGeneBiotype = map.Values.Any(l => l.Any(m => m.Attributes.Contains("gene_biotype")));
        if (bHasGeneBiotype)
        {
          sw.WriteLine("gene_id\tgene_name\tlength\tgene_biotype");
        }
        else
        {
          sw.WriteLine("gene_id\tgene_name\tlength");
        }

        foreach (var key in keys)
        {
          var gtfs = map[key];
          string name;
          var gtf = gtfs.FirstOrDefault(m => m.Attributes.Contains("gene_name"));
          gtfs.CombineCoordinates();
          string biotype;
          if (gtf == null)
          {
            biotype = string.Empty;
            if (!namemap.TryGetValue(key, out name))
            {
              name = key;
            }
          }
          else
          {
            biotype = gtf.GetBiotype();
            name = gtf.Attributes.StringAfter("gene_name \"").StringBefore("\"");
          }

          if (bHasGeneBiotype)
          {
            sw.WriteLine("{0}\t{1}\t{2}\t{3}", key, name, gtfs.Sum(m => m.Length), biotype);
          }
          else
          {
            sw.WriteLine("{0}\t{1}\t{2}", key, name, gtfs.Sum(m => m.Length));
          }
        }
      }

      return new string[] { options.OutputFile };
    }
Пример #7
0
    public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
    {
      List<CoverageRegion> result;
      if (options.TargetFile.EndsWith(".xml"))
      {
        result = GetTargetCoverageRegionFromXml(options, progress);
      }
      else
      {
        result = GetTargetCoverageRegionFromBed(options, progress);
      }

      var dic = result.ToGroupDictionary(m => m.Seqname);

      progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
      using (var sr = new StreamReader(options.GenomeFastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
          var seqname = seq.Name.StringAfter("chr");
          List<CoverageRegion> lst;
          if (dic.TryGetValue(seqname, out lst))
          {
            foreach (var l in lst)
            {
              l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
              if(l.Strand == '+')
              {
                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
              }
            }
          }
        }
      }
      if (removeRegionWithoutSequence)
      {
        result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
      }

      progress.SetMessage("Filling sequence finished.");

      var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);
      result.ForEach(m =>
      {
        var gene = m.Name.StringBefore("_utr3");
        m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
      });

      return result;
    }
    public override IEnumerable<string> Process()
    {
      this.Progress.SetMessage("Reading group sample map file ...");
      var groupSampleMap = new MapReader(0, 1).ReadFromFile(options.MapFile);

      Dictionary<string, SignificantItem> geneNameMap = new Dictionary<string, SignificantItem>();

      this.Progress.SetMessage("Reading cuffdiff significant files ...");
      var sigs = (from file in options.SignificantFiles
                  from line in File.ReadAllLines(file).Skip(1)
                  let parts = line.Split('\t')
                  where parts.Length >= 3
                  let gene = parts[1]
                  let name = parts[2]
                  where !name.Equals("-")
                  let location = parts[3]
                  select new SignificantItem() { Gene = gene, GeneName = name, GeneLocation = location }).ToList();
      foreach (var gene in sigs)
      {
        if (!geneNameMap.ContainsKey(gene.Gene))
        {
          geneNameMap[gene.Gene] = gene;
        }
      }
      Func<string, bool> acceptGene = m => geneNameMap.ContainsKey(m);
      var countFile = options.OutputFilePrefix + ".count";
      var fpkmFile = options.OutputFilePrefix + ".fpkm";

      var items = new List<TrackingItem>();
      foreach (var trackingFile in options.InputFiles)
      {
        this.Progress.SetMessage("Reading cuffdiff read_group_tracking file " + trackingFile + "...");
        using (StreamReader sr = new StreamReader(trackingFile))
        {
          string line = sr.ReadLine();
          while ((line = sr.ReadLine()) != null)
          {
            var parts = line.Split('\t');
            if (parts.Length <= 7)
            {
              continue;
            }
            var gene = parts[0];
            if (!acceptGene(gene))
            {
              continue;
            }

            var group_index = parts[1] + "_" + parts[2];
            var sample = groupSampleMap.ContainsKey(group_index) ? groupSampleMap[group_index] : group_index;
            var count = parts[3];
            var fpkm = parts[6];
            var item = new TrackingItem() { Gene = gene, Sample = sample, Count = count, FPKM = fpkm };
            items.Add(item);
          }
        }
      }

      this.Progress.SetMessage("Preparing result ...");
      var samples = new HashSet<string>(from item in items
                                        select item.Sample).OrderBy(m => m).ToList();
      this.Progress.SetMessage(string.Format("There are {0} samples", samples.Count));

      var genes = new HashSet<string>(from item in items
                                      select item.Gene).OrderBy(m => m).ToList();
      this.Progress.SetMessage(string.Format("There are {0} genes", genes.Count));

      var map = ToDoubleDirectory(items);

      this.Progress.SetMessage("Removing empty genes ...");
      foreach (var gene in genes)
      {
        if (map[gene].All(m => m.Value.Count == "0"))
        {
          map.Remove(gene);
        }
      }

      var finalGenes = map.Keys.OrderBy(m => m).ToList();

      this.Progress.SetMessage("Outputing result ...");
      OutputFile(samples, finalGenes, map, geneNameMap, m => Math.Round(double.Parse(m.Count)).ToString(), countFile);
      OutputFile(samples, finalGenes, map, geneNameMap, m => m.FPKM, fpkmFile);

      this.Progress.End();
      return new[] { countFile, fpkmFile };
    }